This source file includes following definitions.
- mca_scoll_basic_reduce
- _algorithm_central_counter
- _algorithm_tournament
- _algorithm_recursive_doubling
- _algorithm_linear
- _algorithm_log
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 #include "oshmem_config.h"
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 
  15 #include "opal/util/bit_ops.h"
  16 
  17 #include "oshmem/constants.h"
  18 #include "oshmem/op/op.h"
  19 #include "oshmem/mca/spml/spml.h"
  20 #include "oshmem/mca/scoll/scoll.h"
  21 #include "oshmem/mca/scoll/base/base.h"
  22 #include "scoll_basic.h"
  23 
  24 static int _algorithm_central_counter(struct oshmem_group_t *group,
  25                                        struct oshmem_op_t *op,
  26                                        void *target,
  27                                        const void *source,
  28                                        size_t nlong,
  29                                        long *pSync,
  30                                        void *pWrk);
  31 static int _algorithm_tournament(struct oshmem_group_t *group,
  32                                   struct oshmem_op_t *op,
  33                                   void *target,
  34                                   const void *source,
  35                                   size_t nlong,
  36                                   long *pSync,
  37                                   void *pWrk);
  38 static int _algorithm_recursive_doubling(struct oshmem_group_t *group,
  39                                           struct oshmem_op_t *op,
  40                                           void *target,
  41                                           const void *source,
  42                                           size_t nlong,
  43                                           long *pSync,
  44                                           void *pWrk);
  45 static int _algorithm_linear(struct oshmem_group_t *group,
  46                               struct oshmem_op_t *op,
  47                               void *target,
  48                               const void *source,
  49                               size_t nlong,
  50                               long *pSync,
  51                               void *pWrk);
  52 static int _algorithm_log(struct oshmem_group_t *group,
  53                            struct oshmem_op_t *op,
  54                            void *target,
  55                            const void *source,
  56                            size_t nlong,
  57                            long *pSync,
  58                            void *pWrk);
  59 
  60 int mca_scoll_basic_reduce(struct oshmem_group_t *group,
  61                            struct oshmem_op_t *op,
  62                            void *target,
  63                            const void *source,
  64                            size_t nlong,
  65                            long *pSync,
  66                            void *pWrk,
  67                            int alg)
  68 {
  69     int rc = OSHMEM_SUCCESS;
  70 
  71     
  72     if (!group) {
  73         SCOLL_ERROR("Active set (group) of PE is not defined");
  74         rc = OSHMEM_ERR_BAD_PARAM;
  75     }
  76 
  77     
  78     if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
  79         int i = 0;
  80 
  81         
  82         if (OPAL_UNLIKELY(!nlong)) {
  83             return OSHMEM_SUCCESS;
  84         }
  85 
  86         if (pSync) {
  87             alg = (alg == SCOLL_DEFAULT_ALG ?
  88                     mca_scoll_basic_param_reduce_algorithm : alg);
  89             switch (alg) {
  90             case SCOLL_ALG_REDUCE_CENTRAL_COUNTER:
  91                 {
  92                     rc = _algorithm_central_counter(group,
  93                                                      op,
  94                                                      target,
  95                                                      source,
  96                                                      nlong,
  97                                                      pSync,
  98                                                      pWrk);
  99                     break;
 100                 }
 101             case SCOLL_ALG_REDUCE_TOURNAMENT:
 102                 {
 103                     rc = _algorithm_tournament(group,
 104                                                 op,
 105                                                 target,
 106                                                 source,
 107                                                 nlong,
 108                                                 pSync,
 109                                                 pWrk);
 110                     break;
 111                 }
 112             case SCOLL_ALG_REDUCE_RECURSIVE_DOUBLING:
 113                 {
 114                     rc = _algorithm_recursive_doubling(group,
 115                                                         op,
 116                                                         target,
 117                                                         source,
 118                                                         nlong,
 119                                                         pSync,
 120                                                         pWrk);
 121                     break;
 122                 }
 123             case SCOLL_ALG_REDUCE_LEGACY_LINEAR:
 124                 {
 125                     rc = _algorithm_linear(group,
 126                                             op,
 127                                             target,
 128                                             source,
 129                                             nlong,
 130                                             pSync,
 131                                             pWrk);
 132                     break;
 133                 }
 134             case SCOLL_ALG_REDUCE_LEGACY_LOG:
 135                 {
 136                     rc = _algorithm_log(group,
 137                                          op,
 138                                          target,
 139                                          source,
 140                                          nlong,
 141                                          pSync,
 142                                          pWrk);
 143                     break;
 144                 }
 145             default:
 146                 {
 147                     rc = _algorithm_central_counter(group,
 148                                                      op,
 149                                                      target,
 150                                                      source,
 151                                                      nlong,
 152                                                      pSync,
 153                                                      pWrk);
 154                 }
 155             }
 156         } else {
 157             SCOLL_ERROR("Incorrect argument pSync");
 158             rc = OSHMEM_ERR_BAD_PARAM;
 159         }
 160 
 161         
 162         SCOLL_VERBOSE(12,
 163                       "PE#%d Restore special synchronization array",
 164                       group->my_pe);
 165         for (i = 0; pSync && (i < _SHMEM_REDUCE_SYNC_SIZE); i++) {
 166             pSync[i] = _SHMEM_SYNC_VALUE;
 167         }
 168     }
 169 
 170     return rc;
 171 }
 172 
 173 
 174 
 175 
 176 
 177 
 178 
 179 static int _algorithm_central_counter(struct oshmem_group_t *group,
 180                                        struct oshmem_op_t *op,
 181                                        void *target,
 182                                        const void *source,
 183                                        size_t nlong,
 184                                        long *pSync,
 185                                        void *pWrk)
 186 {
 187     int rc = OSHMEM_SUCCESS;
 188     int i = 0;
 189     int PE_root = oshmem_proc_pe(group->proc_array[0]);
 190 
 191     SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe);
 192 
 193     if (PE_root == group->my_pe) {
 194         int pe_cur = 0;
 195         void *target_cur = NULL;
 196 
 197         target_cur = malloc(nlong);
 198         if (target_cur) {
 199             memcpy(target, (void *) source, nlong);
 200 
 201             SCOLL_VERBOSE(14,
 202                           "[#%d] Gather data from all PEs in the group",
 203                           group->my_pe);
 204             for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
 205                     i++) {
 206                 
 207                 pe_cur = oshmem_proc_pe(group->proc_array[i]);
 208 
 209                 if (pe_cur == group->my_pe)
 210                     continue;
 211 
 212                 SCOLL_VERBOSE(14,
 213                               "[#%d] Gather data (%d bytes) from #%d",
 214                               group->my_pe, (int)nlong, pe_cur);
 215 
 216                 
 217                 memset(target_cur, 0, nlong);
 218 
 219                 
 220                 rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void *)source, nlong, target_cur, pe_cur));
 221 
 222                 
 223                 if (rc == OSHMEM_SUCCESS) {
 224                     op->o_func.c_fn(target_cur, target, nlong / op->dt_size);
 225                 }
 226             }
 227 
 228             free(target_cur);
 229         } else {
 230             rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 231         }
 232     }
 233 
 234     
 235     if (rc == OSHMEM_SUCCESS) {
 236         SCOLL_VERBOSE(14,
 237                       "[#%d] Broadcast from the root #%d",
 238                       group->my_pe, PE_root);
 239         rc = BCAST_FUNC(group,
 240                 PE_root,
 241                 target,
 242                 target,
 243                 nlong,
 244                 (pSync + 1),
 245                 true,
 246                 SCOLL_DEFAULT_ALG);
 247     }
 248 
 249     return rc;
 250 }
 251 
 252 static int _algorithm_tournament(struct oshmem_group_t *group,
 253                                   struct oshmem_op_t *op,
 254                                   void *target,
 255                                   const void *source,
 256                                   size_t nlong,
 257                                   long *pSync,
 258                                   void *pWrk)
 259 {
 260     int rc = OSHMEM_SUCCESS;
 261     int round = 0;
 262     int exit_flag = group->proc_count - 1;
 263     long value = SHMEM_SYNC_INIT;
 264     int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 265     int peer_id = 0;
 266     int peer_pe = 0;
 267     void *target_cur = NULL;
 268     int PE_root = oshmem_proc_pe(group->proc_array[0]);
 269 
 270     SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Tournament", group->my_pe);
 271     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 272 
 273     
 274     pSync[0] = SHMEM_SYNC_WAIT;
 275 
 276     target_cur = malloc(nlong);
 277     if (target_cur) {
 278         memcpy(target_cur, (void *) source, nlong);
 279     } else {
 280         return OSHMEM_ERR_OUT_OF_RESOURCE;
 281     }
 282 
 283     while (exit_flag && (rc == OSHMEM_SUCCESS)) {
 284         
 285         peer_id = my_id ^ (1 << round);
 286 
 287         
 288         exit_flag >>= 1;
 289         round++;
 290 
 291         
 292         if (peer_id >= group->proc_count)
 293             continue;
 294 
 295         if (my_id < peer_id) {
 296             pSync[0] = peer_id;
 297             value = my_id;
 298 
 299             SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 300             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 301 
 302             
 303             if (rc == OSHMEM_SUCCESS) {
 304                 op->o_func.c_fn(target, target_cur, nlong / op->dt_size);
 305             }
 306         } else {
 307             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 308 
 309 #if 1 
 310 
 311 
 312 
 313             do {
 314                 MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 315             } while (value != my_id);
 316 
 317             SCOLL_VERBOSE(14,
 318                           "[#%d] round = %d send data to #%d",
 319                           group->my_pe, round, peer_pe);
 320             rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe));
 321 
 322             MCA_SPML_CALL(fence(oshmem_ctx_default));
 323 
 324             SCOLL_VERBOSE(14,
 325                           "[#%d] round = %d signals to #%d",
 326                           group->my_pe, round, peer_pe);
 327             value = peer_id;
 328             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 329 #endif
 330             SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 331             value = SHMEM_SYNC_RUN;
 332             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 333 
 334             break;
 335         }
 336     }
 337 
 338     
 339     if ((my_id == 0) && (rc == OSHMEM_SUCCESS)) {
 340         SCOLL_VERBOSE(14, "[#%d] signals to all", group->my_pe);
 341 
 342         memcpy(target, target_cur, nlong);
 343 
 344         value = SHMEM_SYNC_RUN;
 345         for (peer_id = 1;
 346                 (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS);
 347                 peer_id++) {
 348             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 349             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 350         }
 351     }
 352 
 353     
 354     if (rc == OSHMEM_SUCCESS) {
 355         SCOLL_VERBOSE(14,
 356                       "[#%d] Broadcast from the root #%d",
 357                       group->my_pe, PE_root);
 358         rc = BCAST_FUNC(group,
 359                 PE_root,
 360                 target,
 361                 target,
 362                 nlong,
 363                 (pSync + 1),
 364                 true,
 365                 SCOLL_DEFAULT_ALG);
 366     }
 367 
 368     free(target_cur);
 369 
 370     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 371 
 372     return rc;
 373 }
 374 
 375 static int _algorithm_recursive_doubling(struct oshmem_group_t *group,
 376                                           struct oshmem_op_t *op,
 377                                           void *target,
 378                                           const void *source,
 379                                           size_t nlong,
 380                                           long *pSync,
 381                                           void *pWrk)
 382 {
 383     int rc = OSHMEM_SUCCESS;
 384     int round = 0;
 385     int floor2_proc = 0;
 386     int exit_flag = 0;
 387     long value = SHMEM_SYNC_INIT;
 388     void *target_cur = NULL;
 389     int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 390     int peer_id = 0;
 391     int peer_pe = 0;
 392     int i = 0;
 393 
 394     floor2_proc = 1;
 395     i = group->proc_count;
 396     i >>= 1;
 397     while (i) {
 398         i >>= 1;
 399         floor2_proc <<= 1;
 400     }
 401 
 402     target_cur = malloc(nlong);
 403     if (target_cur) {
 404         memcpy(target_cur, (void *) source, nlong);
 405     } else {
 406         return OSHMEM_ERR_OUT_OF_RESOURCE;
 407     }
 408 
 409     SCOLL_VERBOSE(12,
 410                   "[#%d] Reduce algorithm: Recursive Doubling",
 411                   group->my_pe);
 412     SCOLL_VERBOSE(15,
 413                   "[#%d] pSync[0] = %ld floor2_proc = %d",
 414                   group->my_pe, pSync[0], floor2_proc);
 415 
 416     if (my_id >= floor2_proc) {
 417         
 418         peer_id = my_id - floor2_proc;
 419         peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 420 
 421         
 422         if (source == target) {
 423             SCOLL_VERBOSE(14,
 424                           "[#%d] wait for peer #%d is ready",
 425                           group->my_pe, peer_pe);
 426             value = SHMEM_SYNC_WAIT;
 427             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 428         }
 429 
 430         SCOLL_VERBOSE(14,
 431                       "[#%d] is extra send data to #%d",
 432                       group->my_pe, peer_pe);
 433         rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe));
 434 
 435         MCA_SPML_CALL(fence(oshmem_ctx_default));
 436 
 437         SCOLL_VERBOSE(14,
 438                       "[#%d] is extra and signal to #%d",
 439                       group->my_pe, peer_pe);
 440         value = SHMEM_SYNC_RUN;
 441         rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 442 
 443         SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
 444         value = SHMEM_SYNC_RUN;
 445         rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 446     } else {
 447         
 448         if ((group->proc_count - floor2_proc) > my_id) {
 449             
 450             peer_id = my_id + floor2_proc;
 451             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 452 
 453             
 454             if (source == target) {
 455                 SCOLL_VERBOSE(14,
 456                               "[#%d] signal to #%d that I am ready",
 457                               group->my_pe, peer_pe);
 458                 value = SHMEM_SYNC_WAIT;
 459                 rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 460             }
 461 
 462             SCOLL_VERBOSE(14,
 463                           "[#%d] wait a signal from #%d",
 464                           group->my_pe, peer_pe);
 465             value = SHMEM_SYNC_RUN;
 466             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 467 
 468             
 469             if (rc == OSHMEM_SUCCESS) {
 470                 op->o_func.c_fn(target, target_cur, nlong / op->dt_size);
 471             }
 472         }
 473 
 474         
 475         exit_flag = floor2_proc - 1;
 476         pSync[0] = round;
 477         while (exit_flag && (rc == OSHMEM_SUCCESS)) {
 478             
 479             peer_id = my_id ^ (1 << round);
 480 
 481             
 482             exit_flag >>= 1;
 483             round++;
 484 
 485             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 486 
 487 #if 1 
 488 
 489 
 490 
 491             do {
 492                 MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 493             } while (value != (round - 1));
 494 
 495             SCOLL_VERBOSE(14,
 496                           "[#%d] round = %d send data to #%d",
 497                           group->my_pe, round, peer_pe);
 498             rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe));
 499 
 500             MCA_SPML_CALL(fence(oshmem_ctx_default));
 501 
 502             SCOLL_VERBOSE(14,
 503                           "[#%d] round = %d signals to #%d",
 504                           group->my_pe, round, peer_pe);
 505             value = SHMEM_SYNC_RUN;
 506             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 507 #endif
 508 
 509             SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 510             value = SHMEM_SYNC_RUN;
 511             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 512 
 513             
 514             if (rc == OSHMEM_SUCCESS) {
 515                 op->o_func.c_fn(target, target_cur, nlong / op->dt_size);
 516             }
 517 
 518             pSync[0] = round;
 519         }
 520 
 521         memcpy(target, target_cur, nlong);
 522 
 523         
 524         if ((group->proc_count - floor2_proc) > my_id) {
 525             
 526             peer_id = my_id + floor2_proc;
 527             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 528 
 529             SCOLL_VERBOSE(14,
 530                           "[#%d] is extra send data to #%d",
 531                           group->my_pe, peer_pe);
 532             rc = MCA_SPML_CALL(put(oshmem_ctx_default, target, nlong, target_cur, peer_pe));
 533 
 534             MCA_SPML_CALL(fence(oshmem_ctx_default));
 535 
 536             SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
 537             value = SHMEM_SYNC_RUN;
 538             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 539         }
 540     }
 541 
 542     free(target_cur);
 543 
 544     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 545 
 546     return rc;
 547 }
 548 
 549 static int _algorithm_linear(struct oshmem_group_t *group,
 550                               struct oshmem_op_t *op,
 551                               void *target,
 552                               const void *source,
 553                               size_t nlong,
 554                               long *pSync,
 555                               void *pWrk)
 556 {
 557     int rc = OSHMEM_SUCCESS;
 558     int i, rank, size;
 559     char *free_buffer = NULL;
 560     char *pml_buffer = NULL;
 561     char *inbuf;
 562     int peer_id = 0;
 563     int peer_pe = 0;
 564 
 565     
 566     rank = group->my_pe;
 567     size = group->proc_count;
 568     int root_id = size - 1;
 569     int root_pe = oshmem_proc_pe(group->proc_array[root_id]);
 570 
 571     SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Basic", group->my_pe);
 572 
 573     
 574 
 575     if (rank != root_pe) {
 576         rc = MCA_SPML_CALL(send((void*)source, nlong, root_pe, MCA_SPML_BASE_PUT_STANDARD));
 577     } else {
 578 
 579         
 580 
 581         if (size > 1) {
 582             free_buffer = (char*) malloc(nlong);
 583             if (NULL == free_buffer) {
 584                 return OSHMEM_ERR_OUT_OF_RESOURCE;
 585             }
 586             pml_buffer = free_buffer;
 587         }
 588 
 589         
 590 
 591         if (root_id == (size - 1)) {
 592             memcpy(target, (void *) source, nlong);
 593         } else {
 594             peer_id = size - 1;
 595             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 596             rc = MCA_SPML_CALL(recv(target, nlong, peer_pe));
 597         }
 598         if (OSHMEM_SUCCESS != rc) {
 599             if (NULL != free_buffer) {
 600                 free(free_buffer);
 601             }
 602             return rc;
 603         }
 604 
 605         
 606 
 607         for (i = size - 2; i >= 0; --i) {
 608             if (root_id == i) {
 609                 inbuf = (char*) source;
 610             } else {
 611                 peer_id = i;
 612                 peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 613                 rc = MCA_SPML_CALL(recv(pml_buffer, nlong, peer_pe));
 614                 if (OSHMEM_SUCCESS != rc) {
 615                     if (NULL != free_buffer) {
 616                         free(free_buffer);
 617                     }
 618                     return rc;
 619                 }
 620 
 621                 inbuf = pml_buffer;
 622             }
 623 
 624             
 625             op->o_func.c_fn(inbuf, target, nlong / op->dt_size);
 626         }
 627 
 628         if (NULL != free_buffer) {
 629             free(free_buffer);
 630         }
 631     }
 632 
 633     
 634     if (rc == OSHMEM_SUCCESS) {
 635         SCOLL_VERBOSE(14,
 636                       "[#%d] Broadcast from the root #%d",
 637                       group->my_pe, root_pe);
 638         rc = BCAST_FUNC(group,
 639                 root_pe,
 640                 target,
 641                 target,
 642                 nlong,
 643                 (pSync + 1),
 644                 true,
 645                 SCOLL_DEFAULT_ALG);
 646     }
 647 
 648     
 649     return rc;
 650 }
 651 
 652 static int _algorithm_log(struct oshmem_group_t *group,
 653                            struct oshmem_op_t *op,
 654                            void *target,
 655                            const void *source,
 656                            size_t nlong,
 657                            long *pSync,
 658                            void *pWrk)
 659 {
 660     int rc = OSHMEM_SUCCESS;
 661     int i, size, rank, vrank;
 662     int mask;
 663     void *sbuf = (void*) source;
 664     void *rbuf = target;
 665     char *free_buffer = NULL;
 666     char *free_rbuf = NULL;
 667     char *pml_buffer = NULL;
 668     char *snd_buffer = NULL;
 669     char *rcv_buffer = (char*) rbuf;
 670     int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 671     int peer_id = 0;
 672     int peer_pe = 0;
 673     int root_id = 0;
 674     int root_pe = oshmem_proc_pe(group->proc_array[root_id]);
 675     int dim = 0;
 676 
 677     
 678     rank = group->my_pe;
 679     size = group->proc_count;
 680     dim = opal_cube_dim(group->proc_count);
 681     vrank = (my_id + size - root_id) % size;
 682 
 683     SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Log", rank);
 684 
 685     
 686 
 687 
 688     free_buffer = (char*) malloc(nlong);
 689     if (NULL == free_buffer) {
 690         return OSHMEM_ERR_OUT_OF_RESOURCE;
 691     }
 692 
 693     pml_buffer = free_buffer;
 694     rcv_buffer = pml_buffer;
 695 
 696     
 697 
 698 
 699     snd_buffer = (char*) sbuf;
 700 
 701     if (my_id != root_id && 0 == (vrank & 1)) {
 702         
 703 
 704 
 705         free_rbuf = (char*) malloc(nlong);
 706         if (NULL == free_rbuf) {
 707             rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 708             goto cleanup_and_return;
 709         }
 710         rbuf = free_rbuf;
 711     }
 712 
 713     
 714 
 715 
 716     for (i = 0, mask = 1; i < dim; ++i, mask <<= 1) {
 717 
 718         
 719         if (vrank & mask) {
 720             peer_id = vrank & ~mask;
 721             peer_id = (peer_id + root_id) % size;
 722             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 723 
 724             rc = MCA_SPML_CALL(send((void*)snd_buffer, nlong, peer_pe, MCA_SPML_BASE_PUT_STANDARD));
 725             if (OSHMEM_SUCCESS != rc) {
 726                 goto cleanup_and_return;
 727             }
 728             snd_buffer = (char*) rbuf;
 729             break;
 730         }
 731 
 732         
 733 
 734 
 735         else {
 736             peer_id = vrank | mask;
 737             if (peer_id >= size) {
 738                 continue;
 739             }
 740             peer_id = (peer_id + root_id) % size;
 741             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 742 
 743             
 744 
 745 
 746 
 747 
 748 
 749 
 750 
 751 
 752             rc = MCA_SPML_CALL(recv(rcv_buffer, nlong, peer_pe));
 753             if (OSHMEM_SUCCESS != rc) {
 754                 goto cleanup_and_return;
 755             }
 756             
 757 
 758 
 759             if (snd_buffer != sbuf) {
 760                 
 761                 op->o_func.c_fn(rcv_buffer, pml_buffer, nlong / op->dt_size);
 762             } else {
 763                 
 764 
 765 
 766 
 767 
 768                 {
 769                     op->o_func.c_fn(sbuf, pml_buffer, nlong / op->dt_size);
 770                 }
 771                 
 772                 snd_buffer = pml_buffer;
 773                 
 774 
 775                 rcv_buffer = (char*) rbuf;
 776             }
 777         }
 778     }
 779 
 780     
 781     rc = OSHMEM_SUCCESS;
 782     if (0 == vrank) {
 783         if (root_id == my_id) {
 784             memcpy(rbuf, snd_buffer, nlong);
 785         } else {
 786             rc = MCA_SPML_CALL(send((void*)snd_buffer, nlong, root_pe, MCA_SPML_BASE_PUT_STANDARD));
 787         }
 788     } else if (my_id == root_id) {
 789         rc = MCA_SPML_CALL(recv(rcv_buffer, nlong, root_pe));
 790         if (rcv_buffer != rbuf) {
 791             op->o_func.c_fn(rcv_buffer, rbuf, nlong / op->dt_size);
 792         }
 793     }
 794 
 795     cleanup_and_return: if (NULL != free_buffer) {
 796         free(free_buffer);
 797     }
 798     if (NULL != free_rbuf) {
 799         free(free_rbuf);
 800     }
 801 
 802     
 803     if (rc == OSHMEM_SUCCESS) {
 804         SCOLL_VERBOSE(14,
 805                       "[#%d] Broadcast from the root #%d",
 806                       rank, root_pe);
 807         rc = BCAST_FUNC(group,
 808                 root_pe,
 809                 target,
 810                 target,
 811                 nlong,
 812                 (pSync + 1),
 813                 true,
 814                 SCOLL_DEFAULT_ALG);
 815     }
 816 
 817     
 818     return rc;
 819 }