This source file includes following definitions.
- NBC_Error
- nbc_get_round_size
- nbc_schedule_get_size
- nbc_schedule_inc_size
- nbc_schedule_inc_round
- nbc_get_noop_request
- NBC_DEBUG
- NBC_Type_intrinsic
- NBC_Copy
- NBC_Unpack
- NBC_SchedCache_dictwipe
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 #ifndef __NBC_INTERNAL_H__
  23 #define __NBC_INTERNAL_H__
  24 #include "ompi_config.h"
  25 
  26 
  27 #define NBC_F77_FUNC_ F77_FUNC_
  28 
  29 #include "mpi.h"
  30 
  31 #include "coll_libnbc.h"
  32 #if OPAL_CUDA_SUPPORT
  33 #include "opal/datatype/opal_convertor.h"
  34 #include "opal/datatype/opal_datatype_cuda.h"
  35 #endif 
  36 #include "ompi/include/ompi/constants.h"
  37 #include "ompi/request/request.h"
  38 #include "ompi/datatype/ompi_datatype.h"
  39 #include "ompi/communicator/communicator.h"
  40 
  41 #include <stdlib.h>
  42 #include <stdio.h>
  43 #include <stdarg.h>
  44 #include <assert.h>
  45 #include <math.h>
  46 #include <string.h>
  47 #include "libdict/dict.h"
  48 
  49 #ifdef __cplusplus
  50 extern "C" {
  51 #endif
  52 
  53 
  54 #define LOG2 0.69314718055994530941
  55 
  56 
  57 #define true 1
  58 #define false 0
  59 
  60 
  61 #define NBC_ALLGATHER 0
  62 #define NBC_ALLGATHERV 1
  63 #define NBC_ALLREDUCE 2
  64 #define NBC_ALLTOALL 3
  65 #define NBC_ALLTOALLV 4
  66 #define NBC_ALLTOALLW 5
  67 #define NBC_BARRIER 6
  68 #define NBC_BCAST 7
  69 #define NBC_EXSCAN 8
  70 #define NBC_GATHER 9
  71 #define NBC_GATHERV 10
  72 #define NBC_REDUCE 11
  73 #define NBC_REDUCESCAT 12
  74 #define NBC_SCAN 13
  75 #define NBC_SCATTER 14
  76 #define NBC_SCATTERV 15
  77 
  78 
  79 
  80 
  81 
  82 typedef enum {
  83   SEND,
  84   RECV,
  85   OP,
  86   COPY,
  87   UNPACK
  88 } NBC_Fn_type;
  89 
  90 
  91 typedef struct {
  92   NBC_Fn_type type;
  93   int count;
  94   const void *buf;
  95   MPI_Datatype datatype;
  96   int dest;
  97   char tmpbuf;
  98   bool local;
  99 } NBC_Args_send;
 100 
 101 
 102 typedef struct {
 103   NBC_Fn_type type;
 104   int count;
 105   void *buf;
 106   MPI_Datatype datatype;
 107   char tmpbuf;
 108   int source;
 109   bool local;
 110 } NBC_Args_recv;
 111 
 112 
 113 typedef struct {
 114   NBC_Fn_type type;
 115   char tmpbuf1;
 116   char tmpbuf2;
 117   const void *buf1;
 118   void *buf2;
 119   MPI_Op op;
 120   MPI_Datatype datatype;
 121   int count;
 122 } NBC_Args_op;
 123 
 124 
 125 typedef struct {
 126   NBC_Fn_type type;
 127   int srccount;
 128   void *src;
 129   void *tgt;
 130   MPI_Datatype srctype;
 131   MPI_Datatype tgttype;
 132   int tgtcount;
 133   char tmpsrc;
 134   char tmptgt;
 135 } NBC_Args_copy;
 136 
 137 
 138 typedef struct {
 139   NBC_Fn_type type;
 140   int count;
 141   void *inbuf;
 142   void *outbuf;
 143   MPI_Datatype datatype;
 144   char tmpinbuf;
 145   char tmpoutbuf;
 146 } NBC_Args_unpack;
 147 
 148 
 149 int NBC_Sched_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule, bool barrier);
 150 int NBC_Sched_local_send (const void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest,NBC_Schedule *schedule, bool barrier);
 151 int NBC_Sched_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier);
 152 int NBC_Sched_local_recv (void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule, bool barrier);
 153 int NBC_Sched_op (const void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype,
 154                   MPI_Op op, NBC_Schedule *schedule, bool barrier);
 155 int NBC_Sched_copy (void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount,
 156                     MPI_Datatype tgttype, NBC_Schedule *schedule, bool barrier);
 157 int NBC_Sched_unpack (void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf,
 158                       NBC_Schedule *schedule, bool barrier);
 159 
 160 int NBC_Sched_barrier (NBC_Schedule *schedule);
 161 int NBC_Sched_commit (NBC_Schedule *schedule);
 162 
 163 #ifdef NBC_CACHE_SCHEDULE
 164 
 165 
 166 
 167 
 168 struct NBC_dummyarg {
 169   NBC_Schedule *schedule;
 170 };
 171 
 172 typedef struct {
 173   NBC_Schedule *schedule;
 174   void *sendbuf;
 175   int sendcount;
 176   MPI_Datatype sendtype;
 177   void* recvbuf;
 178   int recvcount;
 179   MPI_Datatype recvtype;
 180 } NBC_Alltoall_args;
 181 int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param);
 182 
 183 typedef struct {
 184   NBC_Schedule *schedule;
 185   void *sendbuf;
 186   int sendcount;
 187   MPI_Datatype sendtype;
 188   void* recvbuf;
 189   int recvcount;
 190   MPI_Datatype recvtype;
 191 } NBC_Allgather_args;
 192 int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param);
 193 
 194 typedef struct {
 195   NBC_Schedule *schedule;
 196   void *sendbuf;
 197   void* recvbuf;
 198   int count;
 199   MPI_Datatype datatype;
 200   MPI_Op op;
 201 } NBC_Allreduce_args;
 202 int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param);
 203 
 204 typedef struct {
 205   NBC_Schedule *schedule;
 206   void *buffer;
 207   int count;
 208   MPI_Datatype datatype;
 209   int root;
 210 } NBC_Bcast_args;
 211 int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param);
 212 
 213 typedef struct {
 214   NBC_Schedule *schedule;
 215   void *sendbuf;
 216   int sendcount;
 217   MPI_Datatype sendtype;
 218   void* recvbuf;
 219   int recvcount;
 220   MPI_Datatype recvtype;
 221   int root;
 222 } NBC_Gather_args;
 223 int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param);
 224 
 225 typedef struct {
 226   NBC_Schedule *schedule;
 227   void *sendbuf;
 228   void* recvbuf;
 229   int count;
 230   MPI_Datatype datatype;
 231   MPI_Op op;
 232   int root;
 233 } NBC_Reduce_args;
 234 int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param);
 235 
 236 typedef struct {
 237   NBC_Schedule *schedule;
 238   void *sendbuf;
 239   void* recvbuf;
 240   int count;
 241   MPI_Datatype datatype;
 242   MPI_Op op;
 243 } NBC_Scan_args;
 244 int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param);
 245 
 246 typedef struct {
 247   NBC_Schedule *schedule;
 248   void *sendbuf;
 249   int sendcount;
 250   MPI_Datatype sendtype;
 251   void* recvbuf;
 252   int recvcount;
 253   MPI_Datatype recvtype;
 254   int root;
 255 } NBC_Scatter_args;
 256 int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param);
 257 
 258 
 259 void NBC_SchedCache_args_delete(void *entry);
 260 void NBC_SchedCache_args_delete_key_dummy(void *k);
 261 
 262 #endif
 263 
 264 
 265 int NBC_Start(NBC_Handle *handle);
 266 int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm,
 267                          ompi_coll_libnbc_module_t *module, bool persistent,
 268                          ompi_request_t **request, void *tmpbuf);
 269 void NBC_Return_handle(ompi_coll_libnbc_request_t *request);
 270 static inline int NBC_Type_intrinsic(MPI_Datatype type);
 271 int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle);
 272 
 273 
 274 
 275 static inline void NBC_Error (char *format, ...) {
 276   va_list args;
 277 
 278   va_start (args, format);
 279   vfprintf (stderr, format, args);
 280   fprintf (stderr, "\n");
 281   va_end (args);
 282 }
 283 
 284 
 285 
 286 
 287 
 288 
 289 
 290 
 291 
 292 
 293 
 294 
 295 
 296 
 297 
 298 
 299 
 300 
 301 #define NBC_GET_BYTES(ptr,x) {memcpy(&x,ptr,sizeof(x)); ptr += sizeof(x);}
 302 #define NBC_PUT_BYTES(ptr,x) {memcpy(ptr,&x,sizeof(x)); ptr += sizeof(x);}
 303 
 304 
 305 
 306 
 307 
 308 static inline void nbc_get_round_size (char *p, unsigned long *size) {
 309   NBC_Fn_type type;
 310   unsigned long offset = 0;
 311   int num;
 312 
 313   NBC_GET_BYTES(p,num);
 314   
 315   for (int i = 0 ; i < num ; ++i) {
 316     memcpy (&type, p + offset, sizeof (type));
 317     switch(type) {
 318     case SEND:
 319       
 320       offset += sizeof(NBC_Args_send);
 321       break;
 322     case RECV:
 323       
 324       offset += sizeof(NBC_Args_recv);
 325       break;
 326     case OP:
 327       
 328       offset += sizeof(NBC_Args_op);            \
 329       break;
 330     case COPY:
 331       
 332       offset += sizeof(NBC_Args_copy);
 333       break;
 334     case UNPACK:
 335       
 336       offset += sizeof(NBC_Args_unpack);
 337       break;
 338     default:
 339       NBC_Error("NBC_GET_ROUND_SIZE: bad type %i at offset %li", type, offset);
 340       return;
 341     }
 342   }
 343 
 344   *size = offset + sizeof (int);
 345 }
 346 
 347 
 348 
 349 static inline int nbc_schedule_get_size (NBC_Schedule *schedule) {
 350   return schedule->size;
 351 }
 352 
 353 
 354 static inline void nbc_schedule_inc_size (NBC_Schedule *schedule, int size) {
 355   schedule->size += size;
 356 }
 357 
 358 
 359 static inline void nbc_schedule_inc_round (NBC_Schedule *schedule) {
 360   int last_round_num;
 361   char *lastround;
 362 
 363   lastround = schedule->data + schedule->current_round_offset;
 364 
 365   
 366 
 367   memcpy (&last_round_num, lastround, sizeof (last_round_num));
 368   ++last_round_num;
 369   memcpy (lastround, &last_round_num, sizeof (last_round_num));
 370 }
 371 
 372 
 373 static inline int nbc_get_noop_request(bool persistent, ompi_request_t **request) {
 374   if (persistent) {
 375     return ompi_request_persistent_noop_create(request);
 376   } else {
 377     *request = &ompi_request_empty;
 378     return OMPI_SUCCESS;
 379   }
 380 }
 381 
 382 
 383 
 384 
 385 #define NBC_PRINT_ROUND(schedule) \
 386  {  \
 387    int myrank, i, num; \
 388    char *p = (char*) schedule; \
 389    NBC_Fn_type type; \
 390    NBC_Args_send     sendargs; \
 391    NBC_Args_recv     recvargs; \
 392    NBC_Args_op         opargs; \
 393    NBC_Args_copy     copyargs; \
 394    NBC_Args_unpack unpackargs; \
 395      \
 396    NBC_GET_BYTES(p,num); \
 397    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \
 398    printf("[%i] has %i actions: \n", myrank, num); \
 399    for (i=0; i<num; i++) { \
 400      NBC_GET_BYTES(p,type); \
 401      switch(type) { \
 402        case SEND: \
 403          printf("[%i]  SEND (offset %li) ", myrank, (long)p-(long)schedule); \
 404          NBC_GET_BYTES(p,sendargs); \
 405          printf("*buf: %lu, count: %i, type: %lu, dest: %i)\n", (unsigned long)sendargs.buf, sendargs.count, (unsigned long)sendargs.datatype, sendargs.dest); \
 406          break; \
 407        case RECV: \
 408          printf("[%i]  RECV (offset %li) ", myrank, (long)p-(long)schedule); \
 409          NBC_GET_BYTES(p,recvargs); \
 410          printf("*buf: %lu, count: %i, type: %lu, source: %i)\n", (unsigned long)recvargs.buf, recvargs.count, (unsigned long)recvargs.datatype, recvargs.source); \
 411          break; \
 412        case OP: \
 413          printf("[%i]  OP   (offset %li) ", myrank, (long)p-(long)schedule); \
 414          NBC_GET_BYTES(p,opargs); \
 415          printf("*buf1: %lu, buf2: %lu, count: %i, type: %lu)\n", (unsigned long)opargs.buf1, (unsigned long)opargs.buf2, opargs.count, (unsigned long)opargs.datatype); \
 416          break; \
 417        case COPY: \
 418          printf("[%i]  COPY   (offset %li) ", myrank, (long)p-(long)schedule); \
 419          NBC_GET_BYTES(p,copyargs); \
 420          printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs.src, copyargs.srccount, (unsigned long)copyargs.srctype, (unsigned long)copyargs.tgt, copyargs.tgtcount, (unsigned long)copyargs.tgttype); \
 421          break; \
 422        case UNPACK: \
 423          printf("[%i]  UNPACK   (offset %li) ", myrank, (long)p-(long)schedule); \
 424          NBC_GET_BYTES(p,unpackargs); \
 425          printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n",(unsigned long)unpackargs.inbuf, unpackargs.count, (unsigned long)unpackargs.datatype, (unsigned long)unpackargs.outbuf); \
 426          break; \
 427        default: \
 428          printf("[%i] NBC_PRINT_ROUND: bad type %i at offset %li\n", myrank, type, (long)p-sizeof(type)-(long)schedule); \
 429          return NBC_BAD_SCHED; \
 430      } \
 431    } \
 432    printf("\n"); \
 433  }
 434 
 435 #define NBC_PRINT_SCHED(schedule) \
 436 { \
 437   int size, myrank; \
 438   long round_size; \
 439   char *ptr; \
 440  \
 441   NBC_GET_SIZE(schedule, size); \
 442   MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \
 443   printf("[%i] printing schedule of size %i\n", myrank, size); \
 444  \
 445    \
 446   ptr = (char*)schedule+sizeof(int); \
 447   while ((long)ptr-(long)schedule < size) { \
 448     NBC_GET_ROUND_SIZE(ptr, round_size); \
 449     printf("[%i] Round at byte %li (size %li) ", myrank, (long)ptr-(long)schedule, round_size); \
 450     NBC_PRINT_ROUND(ptr); \
 451     ptr += round_size; \
 452     ptr += sizeof(char);  \
 453   } \
 454 }
 455 
 456 
 457 
 458 
 459 
 460 static inline void NBC_DEBUG(int level, const char *fmt, ...)
 461 {
 462 #if NBC_DLEVEL > 0
 463   va_list ap;
 464   int rank;
 465 
 466   if(NBC_DLEVEL >= level) {
 467     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 468 
 469     printf("[LibNBC - %i] ", rank);
 470     va_start(ap, fmt);
 471     vprintf(fmt, ap);
 472     va_end (ap);
 473   }
 474 #endif
 475 }
 476 
 477 
 478 static inline int NBC_Type_intrinsic(MPI_Datatype type) {
 479 
 480   if( ( type == MPI_INT ) ||
 481       ( type == MPI_LONG ) ||
 482       ( type == MPI_SHORT ) ||
 483       ( type == MPI_UNSIGNED ) ||
 484       ( type == MPI_UNSIGNED_SHORT ) ||
 485       ( type == MPI_UNSIGNED_LONG ) ||
 486       ( type == MPI_FLOAT ) ||
 487       ( type == MPI_DOUBLE ) ||
 488       ( type == MPI_LONG_DOUBLE ) ||
 489       ( type == MPI_BYTE ) ||
 490       ( type == MPI_FLOAT_INT) ||
 491       ( type == MPI_DOUBLE_INT) ||
 492       ( type == MPI_LONG_INT) ||
 493       ( type == MPI_2INT) ||
 494       ( type == MPI_SHORT_INT) ||
 495       ( type == MPI_LONG_DOUBLE_INT))
 496     return 1;
 497   else
 498     return 0;
 499 }
 500 
 501 
 502 static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
 503   int res;
 504 
 505   res = ompi_datatype_sndrcv(src, srccount, srctype, tgt, tgtcount, tgttype);
 506   if (OMPI_SUCCESS != res) {
 507     NBC_Error ("MPI Error in ompi_datatype_sndrcv() (%i)", res);
 508     return res;
 509   }
 510 
 511   return OMPI_SUCCESS;
 512 }
 513 
 514 static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
 515   MPI_Aint size, pos;
 516   int res;
 517   ptrdiff_t ext, lb;
 518 
 519   res = ompi_datatype_pack_external_size("external32", srccount, srctype, &size);
 520   if (OMPI_SUCCESS != res) {
 521     NBC_Error ("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
 522     return res;
 523   }
 524 #if OPAL_CUDA_SUPPORT
 525   if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
 526 #else
 527   if(NBC_Type_intrinsic(srctype)) {
 528 #endif 
 529     
 530 
 531     res = ompi_datatype_get_extent (srctype, &lb, &ext);
 532     if (OMPI_SUCCESS != res) {
 533       NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res);
 534       return res;
 535     }
 536 
 537     memcpy(tgt, src, srccount * ext);
 538 
 539   } else {
 540     
 541     pos = 0;
 542     res = ompi_datatype_unpack_external("external32", src, size, &pos, tgt, srccount, srctype);
 543     if (MPI_SUCCESS != res) {
 544       NBC_Error ("MPI Error in ompi_datatype_unpack_external() (%i)", res);
 545       return res;
 546     }
 547   }
 548 
 549   return OMPI_SUCCESS;
 550 }
 551 
 552 
 553 static inline void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) {
 554   hb_itor *itor;
 555 
 556   itor = hb_itor_new(dict);
 557   for (; hb_itor_valid(itor) && (*size>NBC_SCHED_DICT_LOWER); hb_itor_next(itor)) {
 558     hb_tree_remove(dict, hb_itor_key(itor), 0);
 559     *size = *size-1;
 560   }
 561   hb_itor_destroy(itor);
 562 }
 563 
 564 #define NBC_IN_PLACE(sendbuf, recvbuf, inplace) \
 565 { \
 566   inplace = 0; \
 567   if(recvbuf == sendbuf) { \
 568     inplace = 1; \
 569   } else \
 570   if(sendbuf == MPI_IN_PLACE) { \
 571     sendbuf = recvbuf; \
 572     inplace = 1; \
 573   } else \
 574   if(recvbuf == MPI_IN_PLACE) { \
 575     recvbuf = (void *)sendbuf; \
 576     inplace = 1; \
 577   } \
 578 }
 579 
 580 int NBC_Comm_neighbors_count (ompi_communicator_t *comm, int *indegree, int *outdegree);
 581 int NBC_Comm_neighbors (ompi_communicator_t *comm, int **sources, int *source_count, int **destinations, int *dest_count);
 582 
 583 #ifdef __cplusplus
 584 }
 585 #endif
 586 
 587 #endif
 588 
 589