root/ompi/mca/coll/portals4/coll_portals4_barrier.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. barrier_hypercube_top
  2. barrier_hypercube_bottom
  3. ompi_coll_portals4_barrier_intra
  4. ompi_coll_portals4_ibarrier_intra
  5. ompi_coll_portals4_ibarrier_intra_fini

   1 /*
   2  * Copyright (c) 2013-2015 Sandia National Laboratories. All rights reserved.
   3  * Copyright (c) 2014      Research Organization for Information Science
   4  *                         and Technology (RIST). All rights reserved.
   5  * Copyright (c) 2015      Bull SAS.  All rights reserved.
   6  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 
  15 #include "ompi_config.h"
  16 
  17 #include "coll_portals4.h"
  18 #include "coll_portals4_request.h"
  19 
  20 #include "mpi.h"
  21 #include "ompi/constants.h"
  22 #include "opal/util/bit_ops.h"
  23 #include "ompi/mca/pml/pml.h"
  24 #include "ompi/mca/coll/coll.h"
  25 #include "ompi/mca/coll/base/base.h"
  26 
  27 
  28 static int
  29 barrier_hypercube_top(struct ompi_communicator_t *comm,
  30         ompi_coll_portals4_request_t *request,
  31         mca_coll_portals4_module_t *portals4_module)
  32 {
  33     bool is_sync = request->is_sync;
  34     int ret, i, dim, hibit, mask, num_msgs;
  35     int size = ompi_comm_size(comm);
  36     int rank = ompi_comm_rank(comm);
  37     ptl_me_t me;
  38     size_t count;
  39     ptl_match_bits_t match_bits_rtr, match_bits;
  40     ptl_ct_event_t event;
  41     ptl_handle_md_t md_h;
  42 
  43     md_h = mca_coll_portals4_component.zero_md_h;
  44 
  45     request->type = OMPI_COLL_PORTALS4_TYPE_BARRIER;
  46 
  47     count = opal_atomic_add_fetch_size_t(&portals4_module->coll_count, 1);
  48 
  49     ret = PtlCTAlloc(mca_coll_portals4_component.ni_h,
  50             &request->u.barrier.rtr_ct_h);
  51     if (PTL_OK != ret) {
  52         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
  53                 "%s:%d: PtlCTAlloc failed: %d\n",
  54                 __FILE__, __LINE__, ret);
  55         return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
  56     }
  57 
  58     COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm),
  59             0, 1, COLL_PORTALS4_BARRIER, 0, count);
  60 
  61     COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm),
  62             0, 0, COLL_PORTALS4_BARRIER, 0, count);
  63 
  64     /* Build "tree" out of hypercube */
  65     dim = comm->c_cube_dim;
  66     hibit = opal_hibit(rank, dim);
  67     --dim;
  68     /* calculate number of children to receive from */
  69     num_msgs = get_nchildren(dim + 1, hibit, rank, size);
  70 
  71     /* receive space */
  72     memset(&me, 0, sizeof(ptl_me_t));
  73     me.start = NULL;
  74     me.length = 0;
  75     me.ct_handle = request->u.barrier.rtr_ct_h;
  76     me.min_free = 0;
  77     me.uid = mca_coll_portals4_component.uid;
  78     me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE |
  79             PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE |
  80             PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW;
  81     me.match_id.phys.nid = PTL_NID_ANY;
  82     me.match_id.phys.pid = PTL_PID_ANY;
  83     me.match_bits = match_bits;
  84     me.ignore_bits = COLL_PORTALS4_RTR_MASK;
  85     ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
  86             mca_coll_portals4_component.pt_idx,
  87             &me,
  88             PTL_PRIORITY_LIST,
  89             NULL,
  90             &request->u.barrier.data_me_h);
  91     if (PTL_OK != ret) {
  92         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
  93                 "%s:%d: PtlMEAppend failed: %d\n",
  94                 __FILE__, __LINE__, ret);
  95         return OMPI_ERROR;
  96     }
  97 
  98     /* send to parent when children have sent to us */
  99     if (rank > 0) {
 100         int parent = rank & ~(1 << hibit);
 101 
 102         ret = PtlTriggeredPut(md_h,
 103                 0,
 104                 0,
 105                 PTL_NO_ACK_REQ,
 106                 ompi_coll_portals4_get_peer(comm, parent),
 107                 mca_coll_portals4_component.pt_idx,
 108                 match_bits_rtr,
 109                 0,
 110                 NULL,
 111                 0,
 112                 request->u.barrier.rtr_ct_h,
 113                 num_msgs);
 114         if (PTL_OK != ret) {
 115             opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 116                     "%s:%d: PtlTriggeredPut failed: %d\n",
 117                     __FILE__, __LINE__, ret);
 118             return OMPI_ERROR;
 119         }
 120 
 121         /* we'll need to wait for the parent response before the next set of comms */
 122         num_msgs++;
 123     }
 124 
 125     /* send to children when parent (or all children if root) has sent to us */
 126     for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
 127         int peer = rank | mask;
 128         if (peer < size) {
 129             ret = PtlTriggeredPut(md_h,
 130                     0,
 131                     0,
 132                     PTL_NO_ACK_REQ,
 133                     ompi_coll_portals4_get_peer(comm, peer),
 134                     mca_coll_portals4_component.pt_idx,
 135                     match_bits,
 136                     0,
 137                     NULL,
 138                     0,
 139                     request->u.barrier.rtr_ct_h,
 140                     num_msgs);
 141             if (PTL_OK != ret) {
 142                 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 143                         "%s:%d: PtlTriggeredPut failed: %d\n",
 144                         __FILE__, __LINE__, ret);
 145                 return OMPI_ERROR;
 146             }
 147         }
 148     }
 149 
 150     if (is_sync) {
 151         /* Each process has a pending PtlTriggeredPut. To be sure this request will be triggered, we must
 152            call PtlTriggeredCTInc twice. Otherwise, we could free the CT too early and the Put wouldn't be triggered */
 153 
 154         ptl_ct_event_t ct_inc;
 155 
 156         ct_inc.success = 1;
 157         ct_inc.failure = 0;
 158 
 159         if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc,
 160                 request->u.barrier.rtr_ct_h, num_msgs)) != 0) {
 161             return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
 162         }
 163 
 164         if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc,
 165                 request->u.barrier.rtr_ct_h, num_msgs + 1)) != 0) {
 166             return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
 167         }
 168 
 169         ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs + 2, &event);
 170         if (PTL_OK != ret) {
 171             opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 172                     "%s:%d: PtlCTWait failed: %d\n",
 173                     __FILE__, __LINE__, ret);
 174             return OMPI_ERROR;
 175         }
 176     }
 177     else {
 178         /* Send a put to self when we've received all our messages... */
 179         ret = PtlTriggeredPut(md_h,
 180                 0,
 181                 0,
 182                 PTL_NO_ACK_REQ,
 183                 ompi_coll_portals4_get_peer(comm, rank),
 184                 mca_coll_portals4_component.finish_pt_idx,
 185                 0,
 186                 0,
 187                 NULL,
 188                 (uintptr_t) request,
 189                 request->u.barrier.rtr_ct_h,
 190                 num_msgs);
 191         if (PTL_OK != ret) {
 192             opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 193                     "%s:%d: PtlTriggeredPut failed: %d\n",
 194                     __FILE__, __LINE__, ret);
 195             return OMPI_ERROR;
 196         }
 197     }
 198 
 199     return OMPI_SUCCESS;
 200 }
 201 
 202 
 203 static int
 204 barrier_hypercube_bottom(ompi_coll_portals4_request_t *request)
 205 {
 206     int ret;
 207 
 208     /* cleanup */
 209     do {
 210         ret = PtlMEUnlink(request->u.barrier.data_me_h);
 211     } while (PTL_IN_USE == ret);
 212     if (PTL_OK != ret) {
 213         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 214                 "%s:%d: PtlMEUnlink failed: %d\n",
 215                 __FILE__, __LINE__, ret);
 216         return OMPI_ERROR;
 217     }
 218 
 219     ret = PtlCTFree(request->u.barrier.rtr_ct_h);
 220     if (PTL_OK != ret) {
 221         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 222                 "%s:%d: PtlCTFree failed: %d\n",
 223                 __FILE__, __LINE__, ret);
 224         return OMPI_ERROR;
 225     }
 226 
 227     return OMPI_SUCCESS;
 228 }
 229 
 230 
 231 int
 232 ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
 233         mca_coll_base_module_t *module)
 234 {
 235     int ret;
 236     mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module;
 237     ompi_coll_portals4_request_t *request;
 238 
 239 
 240     OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request);
 241     if (NULL == request) {
 242         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 243                 "%s:%d: request alloc failed\n",
 244                 __FILE__, __LINE__);
 245         return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
 246     }
 247 
 248     request->is_sync = true;
 249 
 250     ret = barrier_hypercube_top(comm, request, portals4_module);
 251     if (OMPI_SUCCESS != ret) {
 252         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 253                 "%s:%d: ompi_coll_portals4_barrier_hypercube_top failed %d\n",
 254                 __FILE__, __LINE__, ret);
 255         return OMPI_ERROR;
 256     }
 257 
 258     ret = barrier_hypercube_bottom(request);
 259     if (OMPI_SUCCESS != ret) {
 260         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 261                 "%s:%d: ompi_coll_portals4_barrier_hypercube_bottom failed %d\n",
 262                 __FILE__, __LINE__, ret);
 263         return OMPI_ERROR;
 264     }
 265 
 266     OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
 267     return OMPI_SUCCESS;
 268 }
 269 
 270 
 271 int
 272 ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
 273         ompi_request_t **ompi_req,
 274         struct mca_coll_base_module_2_3_0_t *module)
 275 {
 276     int ret;
 277     mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module;
 278     ompi_coll_portals4_request_t *request;
 279 
 280 
 281     OMPI_COLL_PORTALS4_REQUEST_ALLOC(comm, request);
 282     if (NULL == request) {
 283         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 284                 "%s:%d: request alloc failed\n",
 285                 __FILE__, __LINE__);
 286         return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
 287     }
 288 
 289     *ompi_req = &request->super;
 290     request->is_sync = false;
 291 
 292     ret = barrier_hypercube_top(comm, request, portals4_module);
 293     if (OMPI_SUCCESS != ret) {
 294         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 295                 "%s:%d: ompi_coll_portals4_barrier_hypercube_top failed %d\n",
 296                 __FILE__, __LINE__, ret);
 297         return OMPI_ERROR;
 298     }
 299 
 300     return OMPI_SUCCESS;
 301 }
 302 
 303 
 304 int
 305 ompi_coll_portals4_ibarrier_intra_fini(ompi_coll_portals4_request_t *request)
 306 {
 307     int ret;
 308 
 309     ret = barrier_hypercube_bottom(request);
 310     if (OMPI_SUCCESS != ret) {
 311         opal_output_verbose(1, ompi_coll_base_framework.framework_output,
 312                 "%s:%d: ompi_coll_portals4_barrier_hypercube_bottom failed %d\n",
 313                 __FILE__, __LINE__, ret);
 314         return OMPI_ERROR;
 315     }
 316 
 317     ompi_request_complete(&request->super, true);
 318 
 319     return OMPI_SUCCESS;
 320 }

/* [<][>][^][v][top][bottom][index][help] */