This source file includes following definitions.
- NBC_Error
- nbc_get_round_size
- nbc_schedule_get_size
- nbc_schedule_inc_size
- nbc_schedule_inc_round
- nbc_get_noop_request
- NBC_DEBUG
- NBC_Type_intrinsic
- NBC_Copy
- NBC_Unpack
- NBC_SchedCache_dictwipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 #ifndef __NBC_INTERNAL_H__
23 #define __NBC_INTERNAL_H__
24 #include "ompi_config.h"
25
26
27 #define NBC_F77_FUNC_ F77_FUNC_
28
29 #include "mpi.h"
30
31 #include "coll_libnbc.h"
32 #if OPAL_CUDA_SUPPORT
33 #include "opal/datatype/opal_convertor.h"
34 #include "opal/datatype/opal_datatype_cuda.h"
35 #endif
36 #include "ompi/include/ompi/constants.h"
37 #include "ompi/request/request.h"
38 #include "ompi/datatype/ompi_datatype.h"
39 #include "ompi/communicator/communicator.h"
40
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <stdarg.h>
44 #include <assert.h>
45 #include <math.h>
46 #include <string.h>
47 #include "libdict/dict.h"
48
49 #ifdef __cplusplus
50 extern "C" {
51 #endif
52
53
54 #define LOG2 0.69314718055994530941
55
56
57 #define true 1
58 #define false 0
59
60
61 #define NBC_ALLGATHER 0
62 #define NBC_ALLGATHERV 1
63 #define NBC_ALLREDUCE 2
64 #define NBC_ALLTOALL 3
65 #define NBC_ALLTOALLV 4
66 #define NBC_ALLTOALLW 5
67 #define NBC_BARRIER 6
68 #define NBC_BCAST 7
69 #define NBC_EXSCAN 8
70 #define NBC_GATHER 9
71 #define NBC_GATHERV 10
72 #define NBC_REDUCE 11
73 #define NBC_REDUCESCAT 12
74 #define NBC_SCAN 13
75 #define NBC_SCATTER 14
76 #define NBC_SCATTERV 15
77
78
79
80
81
82 typedef enum {
83 SEND,
84 RECV,
85 OP,
86 COPY,
87 UNPACK
88 } NBC_Fn_type;
89
90
91 typedef struct {
92 NBC_Fn_type type;
93 int count;
94 const void *buf;
95 MPI_Datatype datatype;
96 int dest;
97 char tmpbuf;
98 bool local;
99 } NBC_Args_send;
100
101
102 typedef struct {
103 NBC_Fn_type type;
104 int count;
105 void *buf;
106 MPI_Datatype datatype;
107 char tmpbuf;
108 int source;
109 bool local;
110 } NBC_Args_recv;
111
112
113 typedef struct {
114 NBC_Fn_type type;
115 char tmpbuf1;
116 char tmpbuf2;
117 const void *buf1;
118 void *buf2;
119 MPI_Op op;
120 MPI_Datatype datatype;
121 int count;
122 } NBC_Args_op;
123
124
125 typedef struct {
126 NBC_Fn_type type;
127 int srccount;
128 void *src;
129 void *tgt;
130 MPI_Datatype srctype;
131 MPI_Datatype tgttype;
132 int tgtcount;
133 char tmpsrc;
134 char tmptgt;
135 } NBC_Args_copy;
136
137
138 typedef struct {
139 NBC_Fn_type type;
140 int count;
141 void *inbuf;
142 void *outbuf;
143 MPI_Datatype datatype;
144 char tmpinbuf;
145 char tmpoutbuf;
146 } NBC_Args_unpack;
147
148
149 int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier);
150 int NBC_Sched_local_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest,NBC_Schedule *schedule, bool barrier);
151 int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier);
152 int NBC_Sched_local_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier);
153 int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype,
154 MPI_Op op, NBC_Schedule *schedule, bool barrier);
155 int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount,
156 MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier);
157 int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf,
158 NBC_Schedule *schedule, bool barrier);
159
160 int NBC_Sched_barrier (NBC_Schedule *schedule);
161 int NBC_Sched_commit (NBC_Schedule *schedule);
162
163 #ifdef NBC_CACHE_SCHEDULE
164
165
166
167
168 struct NBC_dummyarg {
169 NBC_Schedule *schedule;
170 };
171
172 typedef struct {
173 NBC_Schedule *schedule;
174 void *sendbuf;
175 int sendcount;
176 MPI_Datatype sendtype;
177 void* recvbuf;
178 int recvcount;
179 MPI_Datatype recvtype;
180 } NBC_Alltoall_args;
181 int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param);
182
183 typedef struct {
184 NBC_Schedule *schedule;
185 void *sendbuf;
186 int sendcount;
187 MPI_Datatype sendtype;
188 void* recvbuf;
189 int recvcount;
190 MPI_Datatype recvtype;
191 } NBC_Allgather_args;
192 int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param);
193
194 typedef struct {
195 NBC_Schedule *schedule;
196 void *sendbuf;
197 void* recvbuf;
198 int count;
199 MPI_Datatype datatype;
200 MPI_Op op;
201 } NBC_Allreduce_args;
202 int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param);
203
204 typedef struct {
205 NBC_Schedule *schedule;
206 void *buffer;
207 int count;
208 MPI_Datatype datatype;
209 int root;
210 } NBC_Bcast_args;
211 int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param);
212
213 typedef struct {
214 NBC_Schedule *schedule;
215 void *sendbuf;
216 int sendcount;
217 MPI_Datatype sendtype;
218 void* recvbuf;
219 int recvcount;
220 MPI_Datatype recvtype;
221 int root;
222 } NBC_Gather_args;
223 int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param);
224
225 typedef struct {
226 NBC_Schedule *schedule;
227 void *sendbuf;
228 void* recvbuf;
229 int count;
230 MPI_Datatype datatype;
231 MPI_Op op;
232 int root;
233 } NBC_Reduce_args;
234 int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param);
235
236 typedef struct {
237 NBC_Schedule *schedule;
238 void *sendbuf;
239 void* recvbuf;
240 int count;
241 MPI_Datatype datatype;
242 MPI_Op op;
243 } NBC_Scan_args;
244 int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param);
245
246 typedef struct {
247 NBC_Schedule *schedule;
248 void *sendbuf;
249 int sendcount;
250 MPI_Datatype sendtype;
251 void* recvbuf;
252 int recvcount;
253 MPI_Datatype recvtype;
254 int root;
255 } NBC_Scatter_args;
256 int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param);
257
258
259 void NBC_SchedCache_args_delete(void *entry);
260 void NBC_SchedCache_args_delete_key_dummy(void *k);
261
262 #endif
263
264
265 int NBC_Start(NBC_Handle *handle);
266 int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm,
267 ompi_coll_libnbc_module_t *module, bool persistent,
268 ompi_request_t **request, void *tmpbuf);
269 void NBC_Return_handle(ompi_coll_libnbc_request_t *request);
270 static inline int NBC_Type_intrinsic(MPI_Datatype type);
271 int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle);
272
273
274
275 static inline void NBC_Error (char *format, ...) {
276 va_list args;
277
278 va_start (args, format);
279 vfprintf (stderr, format, args);
280 fprintf (stderr, "\n");
281 va_end (args);
282 }
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301 #define NBC_GET_BYTES(ptr,x) {memcpy(&x,ptr,sizeof(x)); ptr += sizeof(x);}
302 #define NBC_PUT_BYTES(ptr,x) {memcpy(ptr,&x,sizeof(x)); ptr += sizeof(x);}
303
304
305
306
307
308 static inline void nbc_get_round_size (char *p, unsigned long *size) {
309 NBC_Fn_type type;
310 unsigned long offset = 0;
311 int num;
312
313 NBC_GET_BYTES(p,num);
314
315 for (int i = 0 ; i < num ; ++i) {
316 memcpy (&type, p + offset, sizeof (type));
317 switch(type) {
318 case SEND:
319
320 offset += sizeof(NBC_Args_send);
321 break;
322 case RECV:
323
324 offset += sizeof(NBC_Args_recv);
325 break;
326 case OP:
327
328 offset += sizeof(NBC_Args_op); \
329 break;
330 case COPY:
331
332 offset += sizeof(NBC_Args_copy);
333 break;
334 case UNPACK:
335
336 offset += sizeof(NBC_Args_unpack);
337 break;
338 default:
339 NBC_Error("NBC_GET_ROUND_SIZE: bad type %i at offset %li", type, offset);
340 return;
341 }
342 }
343
344 *size = offset + sizeof (int);
345 }
346
347
348
349 static inline int nbc_schedule_get_size (NBC_Schedule *schedule) {
350 return schedule->size;
351 }
352
353
354 static inline void nbc_schedule_inc_size (NBC_Schedule *schedule, int size) {
355 schedule->size += size;
356 }
357
358
359 static inline void nbc_schedule_inc_round (NBC_Schedule *schedule) {
360 int last_round_num;
361 char *lastround;
362
363 lastround = schedule->data + schedule->current_round_offset;
364
365
366
367 memcpy (&last_round_num, lastround, sizeof (last_round_num));
368 ++last_round_num;
369 memcpy (lastround, &last_round_num, sizeof (last_round_num));
370 }
371
372
373 static inline int nbc_get_noop_request(bool persistent, ompi_request_t **request) {
374 if (persistent) {
375 return ompi_request_persistent_noop_create(request);
376 } else {
377 *request = &ompi_request_empty;
378 return OMPI_SUCCESS;
379 }
380 }
381
382
383
384
385 #define NBC_PRINT_ROUND(schedule) \
386 { \
387 int myrank, i, num; \
388 char *p = (char*) schedule; \
389 NBC_Fn_type type; \
390 NBC_Args_send sendargs; \
391 NBC_Args_recv recvargs; \
392 NBC_Args_op opargs; \
393 NBC_Args_copy copyargs; \
394 NBC_Args_unpack unpackargs; \
395 \
396 NBC_GET_BYTES(p,num); \
397 MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \
398 printf("[%i] has %i actions: \n", myrank, num); \
399 for (i=0; i<num; i++) { \
400 NBC_GET_BYTES(p,type); \
401 switch(type) { \
402 case SEND: \
403 printf("[%i] SEND (offset %li) ", myrank, (long)p-(long)schedule); \
404 NBC_GET_BYTES(p,sendargs); \
405 printf("*buf: %lu, count: %i, type: %lu, dest: %i)\n", (unsigned long)sendargs.buf, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest); \
406 break; \
407 case RECV: \
408 printf("[%i] RECV (offset %li) ", myrank, (long)p-(long)schedule); \
409 NBC_GET_BYTES(p,recvargs); \
410 printf("*buf: %lu, count: %i, type: %lu, source: %i)\n", (unsigned long)recvargs.buf, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source); \
411 break; \
412 case OP: \
413 printf("[%i] OP (offset %li) ", myrank, (long)p-(long)schedule); \
414 NBC_GET_BYTES(p,opargs); \
415 printf("*buf1: %lu, buf2: %lu, count: %i, type: %lu)\n", (unsigned long)opargs.buf1, (unsigned long)opargs.buf2, opargs.count, (unsigned long)opargs.datatype); \
416 break; \
417 case COPY: \
418 printf("[%i] COPY (offset %li) ", myrank, (long)p-(long)schedule); \
419 NBC_GET_BYTES(p,copyargs); \
420 printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs.src, copyargs.srccount, (unsigned long)copyargs.srctype, (unsigned long)copyargs.tgt, copyargs.tgtcount, (unsigned long)copyargs.tgttype); \
421 break; \
422 case UNPACK: \
423 printf("[%i] UNPACK (offset %li) ", myrank, (long)p-(long)schedule); \
424 NBC_GET_BYTES(p,unpackargs); \
425 printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n",(unsigned long)unpackargs.inbuf, unpackargs.count, (unsigned long)unpackargs.datatype, (unsigned long)unpackargs.outbuf); \
426 break; \
427 default: \
428 printf("[%i] NBC_PRINT_ROUND: bad type %i at offset %li\n", myrank, type, (long)p-sizeof(type)-(long)schedule); \
429 return NBC_BAD_SCHED; \
430 } \
431 } \
432 printf("\n"); \
433 }
434
435 #define NBC_PRINT_SCHED(schedule) \
436 { \
437 int size, myrank; \
438 long round_size; \
439 char *ptr; \
440 \
441 NBC_GET_SIZE(schedule, size); \
442 MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \
443 printf("[%i] printing schedule of size %i\n", myrank, size); \
444 \
445 \
446 ptr = (char*)schedule+sizeof(int); \
447 while ((long)ptr-(long)schedule < size) { \
448 NBC_GET_ROUND_SIZE(ptr, round_size); \
449 printf("[%i] Round at byte %li (size %li) ", myrank, (long)ptr-(long)schedule, round_size); \
450 NBC_PRINT_ROUND(ptr); \
451 ptr += round_size; \
452 ptr += sizeof(char); \
453 } \
454 }
455
456
457
458
459
460 static inline void NBC_DEBUG(int level, const char *fmt, ...)
461 {
462 #if NBC_DLEVEL > 0
463 va_list ap;
464 int rank;
465
466 if(NBC_DLEVEL >= level) {
467 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
468
469 printf("[LibNBC - %i] ", rank);
470 va_start(ap, fmt);
471 vprintf(fmt, ap);
472 va_end (ap);
473 }
474 #endif
475 }
476
477
478 static inline int NBC_Type_intrinsic(MPI_Datatype type) {
479
480 if( ( type == MPI_INT ) ||
481 ( type == MPI_LONG ) ||
482 ( type == MPI_SHORT ) ||
483 ( type == MPI_UNSIGNED ) ||
484 ( type == MPI_UNSIGNED_SHORT ) ||
485 ( type == MPI_UNSIGNED_LONG ) ||
486 ( type == MPI_FLOAT ) ||
487 ( type == MPI_DOUBLE ) ||
488 ( type == MPI_LONG_DOUBLE ) ||
489 ( type == MPI_BYTE ) ||
490 ( type == MPI_FLOAT_INT) ||
491 ( type == MPI_DOUBLE_INT) ||
492 ( type == MPI_LONG_INT) ||
493 ( type == MPI_2INT) ||
494 ( type == MPI_SHORT_INT) ||
495 ( type == MPI_LONG_DOUBLE_INT))
496 return 1;
497 else
498 return 0;
499 }
500
501
502 static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
503 int res;
504
505 res = ompi_datatype_sndrcv(src, srccount, srctype, tgt, tgtcount, tgttype);
506 if (OMPI_SUCCESS != res) {
507 NBC_Error ("MPI Error in ompi_datatype_sndrcv() (%i)", res);
508 return res;
509 }
510
511 return OMPI_SUCCESS;
512 }
513
514 static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
515 MPI_Aint size, pos;
516 int res;
517 ptrdiff_t ext, lb;
518
519 res = ompi_datatype_pack_external_size("external32", srccount, srctype, &size);
520 if (OMPI_SUCCESS != res) {
521 NBC_Error ("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
522 return res;
523 }
524 #if OPAL_CUDA_SUPPORT
525 if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
526 #else
527 if(NBC_Type_intrinsic(srctype)) {
528 #endif
529
530
531 res = ompi_datatype_get_extent (srctype, &lb, &ext);
532 if (OMPI_SUCCESS != res) {
533 NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res);
534 return res;
535 }
536
537 memcpy(tgt, src, srccount * ext);
538
539 } else {
540
541 pos = 0;
542 res = ompi_datatype_unpack_external("external32", src, size, &pos, tgt, srccount, srctype);
543 if (MPI_SUCCESS != res) {
544 NBC_Error ("MPI Error in ompi_datatype_unpack_external() (%i)", res);
545 return res;
546 }
547 }
548
549 return OMPI_SUCCESS;
550 }
551
552
553 static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) {
554 hb_itor *itor;
555
556 itor = hb_itor_new(dict);
557 for (; hb_itor_valid(itor) && (*size>NBC_SCHED_DICT_LOWER); hb_itor_next(itor)) {
558 hb_tree_remove(dict, hb_itor_key(itor), 0);
559 *size = *size-1;
560 }
561 hb_itor_destroy(itor);
562 }
563
564 #define NBC_IN_PLACE(sendbuf, recvbuf, inplace) \
565 { \
566 inplace = 0; \
567 if(recvbuf == sendbuf) { \
568 inplace = 1; \
569 } else \
570 if(sendbuf == MPI_IN_PLACE) { \
571 sendbuf = recvbuf; \
572 inplace = 1; \
573 } else \
574 if(recvbuf == MPI_IN_PLACE) { \
575 recvbuf = (void *)sendbuf; \
576 inplace = 1; \
577 } \
578 }
579
580 int NBC_Comm_neighbors_count (ompi_communicator_t *comm, int *indegree, int *outdegree);
581 int NBC_Comm_neighbors (ompi_communicator_t *comm, int **sources, int *source_count, int **destinations, int *dest_count);
582
583 #ifdef __cplusplus
584 }
585 #endif
586
587 #endif
588
589