root/ompi/mca/coll/sm/coll_sm_barrier.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_coll_sm_barrier_intra

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2014 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
  13  * $COPYRIGHT$
  14  *
  15  * Additional copyrights may follow
  16  *
  17  * $HEADER$
  18  */
  19 /** @file */
  20 
  21 #include "ompi_config.h"
  22 
  23 #include "ompi/constants.h"
  24 #include "ompi/communicator/communicator.h"
  25 #include "ompi/mca/coll/coll.h"
  26 #include "opal/sys/atomic.h"
  27 #include "coll_sm.h"
  28 
  29 /**
  30  * Shared memory barrier.
  31  *
  32  * Tree-based algorithm for a barrier: a fan in to rank 0 followed by
  33  * a fan out using the barrier segments in the shared memory area.
  34  *
  35  * There are 2 sets of barrier buffers -- since there can only be, at
  36  * most, 2 outstanding barriers at any time, there is no need for more
  37  * than this.  The generalized in-use flags, control, and data
  38  * segments are not used.
  39  *
  40  * The general algorithm is for a given process to wait for its N
  41  * children to fan in by monitoring a uint32_t in its barrier "in"
  42  * buffer.  When this value reaches N (i.e., each of the children have
  43  * atomically incremented the value), then the process atomically
  44  * increases the uint32_t in its parent's "in" buffer.  Then the
  45  * process waits for the parent to set a "1" in the process' "out"
  46  * buffer.  Once this happens, the process writes a "1" in each of its
  47  * children's "out" buffers, and returns.
  48  *
  49  * There's corner cases, of course, such as the root that has no
  50  * parent, and the leaves that have no children.  But that's the
  51  * general idea.
  52  */
  53 int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm,
  54                               mca_coll_base_module_t *module)
  55 {
  56     int rank, buffer_set;
  57     mca_coll_sm_comm_t *data;
  58     uint32_t i, num_children;
  59     volatile uint32_t *me_in, *me_out, *children = NULL;
  60     opal_atomic_uint32_t *parent;
  61     int uint_control_size;
  62     mca_coll_sm_module_t *sm_module = (mca_coll_sm_module_t*) module;
  63 
  64     /* Lazily enable the module the first time we invoke a collective
  65        on it */
  66     if (!sm_module->enabled) {
  67         int ret;
  68         if (OMPI_SUCCESS != (ret = ompi_coll_sm_lazy_enable(module, comm))) {
  69             return ret;
  70         }
  71     }
  72 
  73     uint_control_size =
  74         mca_coll_sm_component.sm_control_size / sizeof(uint32_t);
  75     data = sm_module->sm_comm_data;
  76     rank = ompi_comm_rank(comm);
  77     num_children = data->mcb_tree[rank].mcstn_num_children;
  78     buffer_set = ((data->mcb_barrier_count++) % 2) * 2;
  79     me_in = &data->mcb_barrier_control_me[buffer_set];
  80     me_out = (uint32_t*)
  81         (((char*) me_in) + mca_coll_sm_component.sm_control_size);
  82 
  83     /* Wait for my children to write to my *in* buffer */
  84 
  85     if (0 != num_children) {
  86         /* Get children *out* buffer */
  87         children = data->mcb_barrier_control_children + buffer_set +
  88             uint_control_size;
  89         SPIN_CONDITION(*me_in == num_children, exit_label1);
  90         *me_in = 0;
  91     }
  92 
  93     /* Send to my parent and wait for a response (don't poll on
  94        parent's out buffer -- that would cause a lot of network
  95        traffic / contention / faults / etc.  Instead, children poll on
  96        local memory and therefore only num_children messages are sent
  97        across the network [vs. num_children *each* time all the
  98        children poll] -- i.e., the memory is only being polled by one
  99        process, and it is only changed *once* by an external
 100        process) */
 101 
 102     if (0 != rank) {
 103         /* Get parent *in* buffer */
 104         parent = &data->mcb_barrier_control_parent[buffer_set];
 105         opal_atomic_add (parent, 1);
 106 
 107         SPIN_CONDITION(0 != *me_out, exit_label2);
 108         *me_out = 0;
 109     }
 110 
 111     /* Send to my children */
 112 
 113     for (i = 0; i < num_children; ++i) {
 114         children[i * uint_control_size * 4] = 1;
 115     }
 116 
 117     /* All done!  End state of the control segment:
 118 
 119        me_in: 0
 120        me_out: 0
 121     */
 122 
 123     return OMPI_SUCCESS;
 124 }

/* [<][>][^][v][top][bottom][index][help] */