root/oshmem/mca/scoll/basic/scoll_basic_barrier.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_scoll_basic_barrier
  2. _algorithm_central_counter
  3. _algorithm_tournament
  4. _algorithm_recursive_doubling
  5. _algorithm_dissemination
  6. _algorithm_basic
  7. _algorithm_adaptive

   1 /*
   2  * Copyright (c) 2013-2015 Mellanox Technologies, Inc.
   3  *                         All rights reserved.
   4  * Copyright (c) 2019      Research Organization for Information Science
   5  *                         and Technology (RIST).  All rights reserved.
   6  * $COPYRIGHT$
   7  *
   8  * Additional copyrights may follow
   9  *
  10  * $HEADER$
  11  */
  12 
  13 #include "oshmem_config.h"
  14 #include <stdio.h>
  15 #include <stdlib.h>
  16 
  17 #include "oshmem/constants.h"
  18 #include "oshmem/mca/spml/spml.h"
  19 #include "oshmem/mca/scoll/scoll.h"
  20 #include "oshmem/mca/scoll/base/base.h"
  21 #include "oshmem/proc/proc.h"
  22 #include "scoll_basic.h"
  23 
  24 static int _algorithm_central_counter(struct oshmem_group_t *group,
  25                                        long *pSync);
  26 static int _algorithm_tournament(struct oshmem_group_t *group, long *pSync);
  27 static int _algorithm_recursive_doubling(struct oshmem_group_t *group,
  28                                           long *pSync);
  29 static int _algorithm_dissemination(struct oshmem_group_t *group, long *pSync);
  30 static int _algorithm_basic(struct oshmem_group_t *group, long *pSync);
  31 static int _algorithm_adaptive(struct oshmem_group_t *group, long *pSync);
  32 
  33 int mca_scoll_basic_barrier(struct oshmem_group_t *group, long *pSync, int alg)
  34 {
  35     int rc = OSHMEM_SUCCESS;
  36 
  37     /* Arguments validation */
  38     if (!group || !pSync) {
  39         SCOLL_ERROR("Active set (group) of PE is not defined");
  40         rc = OSHMEM_ERR_BAD_PARAM;
  41     }
  42 
  43     if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
  44         if (pSync) {
  45             alg = (alg == SCOLL_DEFAULT_ALG ?
  46                     mca_scoll_basic_param_barrier_algorithm : alg);
  47             switch (alg) {
  48             case SCOLL_ALG_BARRIER_CENTRAL_COUNTER:
  49                 {
  50                     rc = _algorithm_central_counter(group, pSync);
  51                     break;
  52                 }
  53             case SCOLL_ALG_BARRIER_TOURNAMENT:
  54                 {
  55                     rc = _algorithm_tournament(group, pSync);
  56                     break;
  57                 }
  58             case SCOLL_ALG_BARRIER_RECURSIVE_DOUBLING:
  59                 {
  60                     rc = _algorithm_recursive_doubling(group, pSync);
  61                     break;
  62                 }
  63             case SCOLL_ALG_BARRIER_DISSEMINATION:
  64                 {
  65                     rc = _algorithm_dissemination(group, pSync);
  66                     break;
  67                 }
  68             case SCOLL_ALG_BARRIER_BASIC:
  69                 {
  70                     rc = _algorithm_basic(group, pSync);
  71                     break;
  72                 }
  73             case SCOLL_ALG_BARRIER_ADAPTIVE:
  74                 {
  75                     rc = _algorithm_adaptive(group, pSync);
  76                     break;
  77                 }
  78             default:
  79                 {
  80                     rc = _algorithm_recursive_doubling(group, pSync);
  81                 }
  82             }
  83         } else {
  84             SCOLL_ERROR("Incorrect argument pSync");
  85             rc = OSHMEM_ERR_BAD_PARAM;
  86         }
  87     }
  88 
  89     return rc;
  90 }
  91 
  92 /*
  93  This algorithm is quite simple and straightforward. But because of it�s obvious simplicity and
  94  the naive prove for correctness it is implemented quite often. One node asks peers if they are
  95  achieve barrier state. When all processors are ready it signals to go ahead.
  96  Outlay:
  97  NP-1 competing network transfers are needed to implement the counter
  98  The memory usage is constant (1 byte) per node.
  99  */
 100 static int _algorithm_central_counter(struct oshmem_group_t *group,
 101                                        long *pSync)
 102 {
 103     int rc = OSHMEM_SUCCESS;
 104     long value = SHMEM_SYNC_INIT;
 105     int root_id = 0;
 106     int PE_root = oshmem_proc_pe(group->proc_array[root_id]);
 107     int i = 0;
 108 
 109     SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Central Counter", group->my_pe);
 110     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 111 
 112     /* Set current state as WAIT */
 113     pSync[0] = SHMEM_SYNC_WAIT;
 114 
 115     /* Root processes synchronization */
 116     if (PE_root == group->my_pe) {
 117         int pe_cur = 0;
 118         long wait_pe_count = 0;
 119         int* wait_pe_array = NULL;
 120 
 121         wait_pe_array = malloc(sizeof(*wait_pe_array) * group->proc_count);
 122         if (wait_pe_array) {
 123             SCOLL_VERBOSE(14, "[#%d] PE is the root", group->my_pe);
 124 
 125             wait_pe_count = group->proc_count;
 126             for (i = 0; i < group->proc_count; i++) {
 127                 wait_pe_array[i] = oshmem_proc_pe(group->proc_array[i]);
 128             }
 129             wait_pe_array[root_id] = OSHMEM_PE_INVALID;
 130             wait_pe_count--;
 131 
 132             while (wait_pe_count) {
 133                 for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
 134                         i++) {
 135                     pe_cur = wait_pe_array[i];
 136                     if (pe_cur != OSHMEM_PE_INVALID) {
 137                         rc = MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur));
 138                         if ((rc == OSHMEM_SUCCESS)
 139                                 && (value == SHMEM_SYNC_WAIT)) {
 140                             wait_pe_array[i] = OSHMEM_PE_INVALID;
 141                             wait_pe_count--;
 142                             SCOLL_VERBOSE(14,
 143                                           "[#%d] PE#%d is ready (wait list counter: %d)",
 144                                           group->my_pe, pe_cur, (int)wait_pe_count);
 145                         }
 146                     }
 147                 }
 148             }
 149 
 150             SCOLL_VERBOSE(14, "[#%d] PE signals to all", group->my_pe);
 151             value = SHMEM_SYNC_RUN;
 152             for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
 153                     i++) {
 154                 pe_cur = oshmem_proc_pe(group->proc_array[i]);
 155                 if (pe_cur != PE_root) {
 156                     rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, pe_cur));
 157                 }
 158             }
 159 
 160             free(wait_pe_array);
 161         } else {
 162             rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 163         }
 164 
 165         /* Possibly this is unnecessary...
 166          But imagine the scenario when you have 2 sequential barriers and the root PE is the fastest one.
 167          The root could leave the first barrier and in the second barrier it could get SHMEM_SYNC_WAIT value on
 168          remote node before the remote node receives its SHMEM_SYNC_RUN value in the first barrier
 169          */
 170         MCA_SPML_CALL(quiet(oshmem_ctx_default));
 171     }
 172     /* Wait for RUN signal */
 173     else {
 174         SCOLL_VERBOSE(14,
 175                       "[#%d] PE waits for a signal from root",
 176                       group->my_pe);
 177 
 178         value = SHMEM_SYNC_RUN;
 179         rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 180     }
 181 
 182     /* Restore initial values */
 183     SCOLL_VERBOSE(12,
 184                   "[#%d] Restore special synchronization array",
 185                   group->my_pe);
 186     for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
 187         pSync[i] = _SHMEM_SYNC_VALUE;
 188     }
 189 
 190     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 191 
 192     return rc;
 193 }
 194 
 195 /*
 196  The Tournament Barrier, proposed by Hengsen, Finkel and Manser is mostly suitable for shared memory
 197  multiprocessors because it benefits from several caching mechanisms.
 198  The algorithm is similar to a tournament game. In each round two
 199  nodes play against each other. The winner is known in advance and waits until the looser arrives. The
 200  winners play against each other in the next round. The overall winner (the champion) notifies all others
 201  about the end of the barrier.
 202  Outlay:
 203  The game scales with log2(NP) and uses 1 byte of memory.
 204  */
 205 static int _algorithm_tournament(struct oshmem_group_t *group, long *pSync)
 206 {
 207     int rc = OSHMEM_SUCCESS;
 208     int round = 0;
 209     int exit_flag = group->proc_count - 1;
 210     long value = SHMEM_SYNC_INIT;
 211     int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 212     int peer_id = 0;
 213     int peer_pe = 0;
 214     int i = 0;
 215 
 216     SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Tournament", group->my_pe);
 217     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 218 
 219     /* Set current state as WAIT */
 220     pSync[0] = SHMEM_SYNC_WAIT;
 221 
 222     while (exit_flag && (rc == OSHMEM_SUCCESS)) {
 223         /* Define a peer for competition */
 224         peer_id = my_id ^ (1 << round);
 225 
 226         /* Update exit condition and round counter */
 227         exit_flag >>= 1;
 228         round++;
 229 
 230         /* Do not have peer for tournament */
 231         if (peer_id >= group->proc_count)
 232             continue;
 233 
 234         if (my_id < peer_id) {
 235             pSync[0] = peer_id;
 236             value = my_id;
 237 
 238             SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 239             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 240         } else {
 241             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 242 
 243 #if 1 /* It is ugly implementation of compare and swap operation
 244          Usage of this hack does not give performance improvement but
 245          it is expected that shmem_long_cswap() will make it faster.
 246        */
 247             do {
 248                 MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 249             } while (value != my_id);
 250 
 251             SCOLL_VERBOSE(14,
 252                           "[#%d] round = %d signals to #%d",
 253                           group->my_pe, round, peer_pe);
 254             value = peer_id;
 255             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 256 #else
 257             SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe);
 258             do
 259             {
 260                 rc = MCA_ATOMIC_CALL(cswap((void*)pSync, (void*)&value, (const void*)&my_id, (const void*)&peer_id, sizeof(value), peer_pe));
 261             }while (value != my_id);
 262 #endif
 263             SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 264             value = SHMEM_SYNC_RUN;
 265             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 266 
 267             break;
 268         }
 269     }
 270 
 271     /* Restore initial values */
 272     SCOLL_VERBOSE(12,
 273                   "[#%d] Restore special synchronization array",
 274                   group->my_pe);
 275     for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
 276         pSync[i] = _SHMEM_SYNC_VALUE;
 277     }
 278 
 279     /* Send result to all PE in group */
 280     if ((my_id == 0) && (rc == OSHMEM_SUCCESS)) {
 281         SCOLL_VERBOSE(14, "[#%d] signals to all", group->my_pe);
 282 
 283         value = SHMEM_SYNC_RUN;
 284         for (peer_id = 1;
 285                 (peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS);
 286                 peer_id++) {
 287             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 288             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 289         }
 290     }
 291 
 292     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 293 
 294     return rc;
 295 }
 296 
 297 /*
 298  Pairwise Exchange With Recursive Doubling.
 299  Rinka Gupta, Vinod Tipparaju, Jare Nieplocha, and Dhabaleswar Panda. Efficient Barrier
 300  using Remote Memory Operations on VIA-Based Clusters. In 2002 IEEE International
 301  Conference on Cluster Computing (CLUSTER 2002), page 83. IEEE Computer Society, 2002.
 302  Outlay:
 303  The algorithm uses a maximum of log2(NP) + 2 network writes and P bytes memory per node.
 304  */
 305 static int _algorithm_recursive_doubling(struct oshmem_group_t *group,
 306                                           long *pSync)
 307 {
 308     int rc = OSHMEM_SUCCESS;
 309     int round = 0;
 310     int floor2_proc = 0;
 311     int exit_flag = 0;
 312     long value = SHMEM_SYNC_INIT;
 313     int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 314     int peer_id = 0;
 315     int peer_pe = 0;
 316     int i = 0;
 317 
 318     floor2_proc = 1;
 319     i = group->proc_count;
 320     i >>= 1;
 321     while (i) {
 322         i >>= 1;
 323         floor2_proc <<= 1;
 324     }
 325 
 326     SCOLL_VERBOSE(12,
 327                   "[#%d] Barrier algorithm: Recursive Doubling",
 328                   group->my_pe);
 329     SCOLL_VERBOSE(15,
 330                   "[#%d] pSync[0] = %ld floor2_proc = %d",
 331                   group->my_pe, pSync[0], floor2_proc);
 332 
 333     if (my_id >= floor2_proc) {
 334         /* I am in extra group, my partner is node (my_id-y) in basic group */
 335         peer_id = my_id - floor2_proc;
 336         peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 337 
 338         SCOLL_VERBOSE(14,
 339                       "[#%d] is extra and signal to #%d",
 340                       group->my_pe, peer_pe);
 341         value = SHMEM_SYNC_WAIT;
 342         rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 343 
 344         SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
 345         value = SHMEM_SYNC_RUN;
 346         rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 347 
 348         /* Restore initial values */
 349         SCOLL_VERBOSE(12,
 350                       "[#%d] Restore special synchronization array",
 351                       group->my_pe);
 352         for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
 353             pSync[i] = _SHMEM_SYNC_VALUE;
 354         }
 355     } else {
 356         /* Wait for a peer from extra group */
 357         if ((group->proc_count - floor2_proc) > my_id) {
 358             /* I am in basic group, my partner is node (my_id+y) in extra group */
 359             peer_id = my_id + floor2_proc;
 360             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 361 
 362             SCOLL_VERBOSE(14,
 363                           "[#%d] wait a signal from #%d",
 364                           group->my_pe, peer_pe);
 365             value = SHMEM_SYNC_WAIT;
 366             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
 367         }
 368 
 369         /* Pairwise exchange  */
 370         exit_flag = floor2_proc - 1;
 371         pSync[0] = round;
 372         while (exit_flag && (rc == OSHMEM_SUCCESS)) {
 373             /* Define a peer for competition */
 374             peer_id = my_id ^ (1 << round);
 375 
 376             /* Update exit condition and round counter */
 377             exit_flag >>= 1;
 378             round++;
 379 
 380             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 381 
 382 #if 1 /* It is ugly implementation of compare and swap operation
 383          Usage of this hack does not give performance improvement but
 384          it is expected that shmem_long_cswap() will make it faster.
 385        */
 386             do {
 387                 MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 388             } while (value != (round - 1));
 389 
 390             SCOLL_VERBOSE(14,
 391                           "[#%d] round = %d signals to #%d",
 392                           group->my_pe, round, peer_pe);
 393             value = round;
 394             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 395 #else
 396             SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe);
 397             {
 398                 long cond = round - 1;
 399                 do
 400                 {
 401                     rc = MCA_ATOMIC_CALL(cswap((void*)pSync, (void*)&value, (const void*)&cond, (const void*)&round, sizeof(value), peer_pe));
 402                 }while (value != (round-1));
 403             }
 404 #endif
 405 
 406             SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 407             value = round;
 408             rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_GE, (void*)&value, SHMEM_LONG));
 409         }
 410 
 411         /* Restore initial values */
 412         SCOLL_VERBOSE(12,
 413                       "[#%d] Restore special synchronization array",
 414                       group->my_pe);
 415         for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
 416             pSync[i] = _SHMEM_SYNC_VALUE;
 417         }
 418 
 419         /* Notify a peer from extra group */
 420         if ((group->proc_count - floor2_proc) > my_id) {
 421             /* I am in basic group, my partner is node (my_id+y) in extra group */
 422             peer_id = my_id + floor2_proc;
 423             peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 424 
 425             SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
 426             value = SHMEM_SYNC_RUN;
 427             rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 428         }
 429     }
 430 
 431     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 432 
 433     return rc;
 434 }
 435 
 436 /*
 437  The Dissemination Barrier, introduced by Hengsen, Finkel and Manser in 1998.
 438  The algorithm is mostly an improvement of the Butterfly Barrier for non power of two processor counts.
 439  It uses the same pairwise synchronization but with other partners.
 440  Outlay:
 441  The game scales with log2(NP) and uses 1 byte of memory.
 442  */
 443 static int _algorithm_dissemination(struct oshmem_group_t *group, long *pSync)
 444 {
 445     int rc = OSHMEM_SUCCESS;
 446     int round = 0;
 447     int log2_proc = 0;
 448     long value = SHMEM_SYNC_INIT;
 449     int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 450     int peer_id = 0;
 451     int peer_pe = 0;
 452     int i = 0;
 453 
 454     log2_proc = scoll_log2((unsigned long) group->proc_count);
 455 
 456     SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Dissemination", group->my_pe);
 457     SCOLL_VERBOSE(15,
 458                   "[#%d] pSync[0] = %ld floor2_proc = %d",
 459                   group->my_pe, pSync[0], log2_proc);
 460 
 461     pSync[0] = round;
 462     for (round = 0; (round <= log2_proc) && (rc == OSHMEM_SUCCESS); round++) {
 463         /* Define a peer to send signal */
 464         peer_id = (my_id + (1 << round)) % group->proc_count;
 465 
 466         peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
 467 
 468 #if 1 /* It is ugly implementation of compare and swap operation
 469          Usage of this hack does not give performance improvement but
 470          it is expected that shmem_long_cswap() will make it faster.
 471        */
 472         do {
 473             MCA_SPML_CALL(get(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 474         } while (value != round);
 475 
 476         SCOLL_VERBOSE(14,
 477                       "[#%d] round = %d signals to #%d",
 478                       group->my_pe, round, peer_pe);
 479         value = round + 1;
 480         rc = MCA_SPML_CALL(put(oshmem_ctx_default, (void*)pSync, sizeof(value), (void*)&value, peer_pe));
 481 #endif
 482 
 483         SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
 484         value = round + 1;
 485         rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_GE, (void*)&value, SHMEM_LONG));
 486     }
 487 
 488     /* Restore initial values */
 489     SCOLL_VERBOSE(12,
 490                   "[#%d] Restore special synchronization array",
 491                   group->my_pe);
 492     for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
 493         pSync[i] = _SHMEM_SYNC_VALUE;
 494     }
 495 
 496     SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
 497 
 498     return rc;
 499 }
 500 
 501 static int _algorithm_basic(struct oshmem_group_t *group, long *pSync)
 502 {
 503     int rc = OSHMEM_SUCCESS;
 504     int root_id = 0;
 505     int PE_root = oshmem_proc_pe(group->proc_array[root_id]);
 506     int i = 0;
 507 
 508     SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Basic", group->my_pe);
 509 
 510     if (PE_root != group->my_pe) {
 511         rc = MCA_SPML_CALL(send(NULL, 0, PE_root, MCA_SPML_BASE_PUT_STANDARD));
 512         if (OSHMEM_SUCCESS != rc) {
 513             return rc;
 514         }
 515 
 516         rc = MCA_SPML_CALL(recv(NULL, 0, PE_root));
 517         if (OSHMEM_SUCCESS != rc) {
 518             return rc;
 519         }
 520     }
 521 
 522     /* The root collects and broadcasts the messages. */
 523 
 524     else {
 525         int pe_cur = 0;
 526 
 527         for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
 528             pe_cur = oshmem_proc_pe(group->proc_array[i]);
 529             if (pe_cur != PE_root) {
 530                 rc = MCA_SPML_CALL(recv(NULL, 0, pe_cur)); 
 531             }
 532             if (OSHMEM_SUCCESS != rc) {
 533                 return rc;
 534             }
 535         }
 536 
 537         for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
 538             pe_cur = oshmem_proc_pe(group->proc_array[i]);
 539             if (pe_cur != PE_root) {
 540                 rc = MCA_SPML_CALL(send(NULL, 0, pe_cur, MCA_SPML_BASE_PUT_STANDARD));
 541             }
 542             if (OSHMEM_SUCCESS != rc) {
 543                 return rc;
 544             }
 545         }
 546     }
 547 
 548     return rc;
 549 }
 550 
 551 static int _algorithm_adaptive(struct oshmem_group_t *group, long *pSync)
 552 {
 553     int rc = OSHMEM_SUCCESS;
 554     bool local_peers_only = true;
 555 
 556     SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Adaptive", group->my_pe);
 557 
 558     /* check if we have only local peers */
 559     {
 560         int i = 0;
 561         int my_id = oshmem_proc_group_find_id(group, group->my_pe);
 562 
 563         for (i = 0; i < group->proc_count; i++) {
 564             if (i == my_id)
 565                 continue;
 566 
 567             if (!OPAL_PROC_ON_LOCAL_NODE(group->proc_array[i]->super.proc_flags)) {
 568                 local_peers_only = false;
 569                 break;
 570             }
 571         }
 572     }
 573 
 574     /* Select algorithm we use:
 575      * use send/recv way for group in the same node and for np < 32
 576      * otherwise use put/get way
 577      */
 578     if (local_peers_only || (group->proc_count < 32)) {
 579         rc = _algorithm_basic(group, pSync);
 580     } else {
 581         rc = _algorithm_recursive_doubling(group, pSync);
 582     }
 583 
 584     return rc;
 585 }

/* [<][>][^][v][top][bottom][index][help] */