root/opal/mca/btl/ugni/btl_ugni_endpoint.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_ugni_ep_construct
  2. mca_btl_ugni_ep_destruct
  3. mca_btl_ugni_endpoint_get_modex
  4. mca_btl_ugni_init_ep
  5. mca_btl_ugni_release_ep
  6. mca_btl_ugni_ep_smsg_get_mbox
  7. mca_btl_ugni_ep_send_disconnect
  8. mca_btl_ugni_ep_disconnect
  9. mca_btl_ugni_ep_connect_start
  10. mca_btl_ugni_ep_connect_finish
  11. mca_btl_ugni_directed_ep_post
  12. mca_btl_ugni_wildcard_ep_post
  13. mca_btl_ugni_ep_connect_progress
  14. mca_btl_ugni_ep_handle_init
  15. mca_btl_ugni_ep_handle_cleanup

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved.
   6  * Copyright (c) 2017      Intel, Inc.  All rights reserved.
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 #include "btl_ugni_endpoint.h"
  15 #include "btl_ugni_smsg.h"
  16 #include "opal/mca/pmix/pmix.h"
  17 
  18 static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
  19 {
  20     memset ((char *) ep + sizeof(ep->super), 0, sizeof (*ep) - sizeof (ep->super));
  21     OBJ_CONSTRUCT(&ep->frag_wait_list, opal_list_t);
  22     OBJ_CONSTRUCT(&ep->lock, opal_recursive_mutex_t);
  23 }
  24 
  25 static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
  26 {
  27     OBJ_DESTRUCT(&ep->frag_wait_list);
  28     OBJ_DESTRUCT(&ep->lock);
  29     free (ep->remote_attr);
  30 }
  31 
  32 OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_t, opal_list_item_t,
  33                    mca_btl_ugni_ep_construct, mca_btl_ugni_ep_destruct);
  34 
  35 static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep)
  36 {
  37     mca_btl_ugni_modex_t *modex;
  38     size_t msg_size;
  39     int rc;
  40 
  41     assert (NULL != ep && NULL != ep->peer_proc);
  42 
  43     /* Receive the modex */
  44     OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version,
  45                     &ep->peer_proc->proc_name, (void **)&modex, &msg_size);
  46     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
  47         BTL_ERROR(("error receiving modex"));
  48         return rc;
  49     }
  50 
  51     ep->ep_rem_addr = modex->addr;
  52     ep->ep_rem_id = modex->id;
  53 
  54 
  55     BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d",  (void*)ep, ep->ep_rem_addr, ep->ep_rem_id));
  56 
  57     free (modex);
  58 
  59     return OPAL_SUCCESS;
  60 }
  61 
  62 int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
  63                           mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc)
  64 {
  65     mca_btl_ugni_endpoint_t *endpoint;
  66     int rc;
  67 
  68     endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
  69     assert (endpoint != NULL);
  70 
  71     endpoint->smsg_progressing = 0;
  72     endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
  73     endpoint->peer_proc = peer_proc;
  74 
  75     /* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
  76      * to re-entry through opal_progress(). */
  77     rc = mca_btl_ugni_endpoint_get_modex (endpoint);
  78     if (OPAL_SUCCESS != rc) {
  79         assert (0);
  80         return rc;
  81     }
  82 
  83     /* add this endpoint to the pointer array */
  84     endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
  85 
  86     *ep = endpoint;
  87 
  88     return OPAL_SUCCESS;
  89 }
  90 
  91 void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep)
  92 {
  93     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
  94     int rc;
  95 
  96     opal_mutex_lock (&ep->lock);
  97 
  98     rc = mca_btl_ugni_ep_disconnect (ep, false);
  99     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 100         BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
 101     }
 102 
 103     /* TODO -- Clear space at the end of the endpoint array */
 104     opal_pointer_array_set_item (&ugni_module->endpoints, ep->index, NULL);
 105 
 106     opal_mutex_unlock (&ep->lock);
 107 
 108     OBJ_RELEASE(ep);
 109 }
 110 
 111 static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
 112     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
 113     opal_free_list_item_t *mbox;
 114 
 115     assert (NULL == ep->mailbox);
 116 
 117     mbox = opal_free_list_get (&ugni_module->smsg_mboxes);
 118     if (OPAL_UNLIKELY(NULL == mbox)) {
 119         return OPAL_ERR_OUT_OF_RESOURCE;
 120     }
 121 
 122     ep->mailbox = (mca_btl_ugni_smsg_mbox_t *) mbox;
 123     ep->mailbox->attr.index = ep->index;
 124 
 125     /* per ugni spec we need to zero mailbox data before connecting */
 126     memset ((char *)ep->mailbox->attr.smsg_attr.msg_buffer + ep->mailbox->attr.smsg_attr.mbox_offset, 0,
 127             ep->mailbox->attr.smsg_attr.buff_size);
 128     return OPAL_SUCCESS;
 129 }
 130 
 131 static int mca_btl_ugni_ep_send_disconnect (mca_btl_base_endpoint_t *ep)
 132 {
 133     int rc;
 134 
 135     do {
 136         rc = mca_btl_ugni_endpoint_smsg_send_wtag (ep, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT);
 137         if (OPAL_LIKELY(GNI_RC_NOT_DONE != rc)) {
 138             break;
 139         }
 140 
 141         /* most likely got here because we are out of credits. check the remote CQ to get credit return */
 142         (void) mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_ep_btl (ep));
 143     } while (1);
 144 
 145     return mca_btl_rc_ugni_to_opal (rc);
 146 }
 147 
 148 int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
 149 {
 150     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
 151     mca_btl_ugni_device_t *device;
 152     int rc;
 153 
 154     if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
 155         /* nothing to do */
 156         return OPAL_SUCCESS;
 157     }
 158 
 159     device = ep->smsg_ep_handle.device;
 160 
 161     while (device->dev_smsg_local_cq.active_operations) {
 162         /* ensure all sends are complete before removing and procs */
 163         rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
 164         if (OPAL_SUCCESS != rc) {
 165             break;
 166         }
 167     }
 168 
 169     if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
 170         rc = mca_btl_ugni_ep_send_disconnect (ep);
 171         if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 172             BTL_VERBOSE(("could not send disconnect message to peer"));
 173         }
 174 
 175         /* wait for the disconnect messagse to go */
 176         do {
 177             /* ensure all sends are complete before removing and procs */
 178             rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
 179             if (OPAL_SUCCESS != rc) {
 180                 break;
 181             }
 182         } while (device->dev_smsg_local_cq.active_operations);
 183 
 184         (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, -1);
 185     }
 186 
 187     mca_btl_ugni_device_lock (device);
 188 
 189     /* NTH: this call may not need the device lock. seems to work without it but
 190      * the lock is here to be safe. */
 191     (void) mca_btl_ugni_ep_handle_cleanup (&ep->smsg_ep_handle);
 192 
 193     mca_btl_ugni_device_unlock (device);
 194 
 195     if (ep->mailbox) {
 196         opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
 197         ep->mailbox = NULL;
 198     }
 199 
 200     ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
 201 
 202     return OPAL_SUCCESS;
 203 }
 204 
 205 static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
 206     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
 207     mca_btl_ugni_device_t *device = ugni_module->devices;
 208     int rc;
 209 
 210     /* protect against re-entry from opal_progress */
 211     if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTING == ep->state)) {
 212         return OPAL_ERR_RESOURCE_BUSY;
 213     }
 214 
 215     ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
 216 
 217     BTL_VERBOSE(("initiating connection to remote peer with address: %u id: %u proc: %p",
 218                  ep->ep_rem_addr, ep->ep_rem_id, (void *)ep->peer_proc));
 219 
 220     /* bind endpoint to remote address */
 221     /* we bind two endpoints to seperate out local smsg completion and local fma completion */
 222     mca_btl_ugni_device_lock (device);
 223     rc = mca_btl_ugni_ep_handle_init (ep, device->dev_smsg_local_cq.gni_handle, device, &ep->smsg_ep_handle);
 224     mca_btl_ugni_device_unlock (device);
 225     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 226         return rc;
 227     }
 228 
 229     /* build connection data */
 230     rc = mca_btl_ugni_ep_smsg_get_mbox (ep);
 231     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 232         return rc;
 233     }
 234 
 235     ep->remote_attr = calloc (1, sizeof (*ep->remote_attr));
 236     if (OPAL_UNLIKELY(NULL == ep->remote_attr)) {
 237         return OPAL_ERR_OUT_OF_RESOURCE;
 238     }
 239 
 240     BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
 241 
 242     return OPAL_SUCCESS;
 243 }
 244 
 245 static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
 246     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
 247     gni_return_t grc;
 248     int rc;
 249 
 250     BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
 251                  "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
 252                  "msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer,
 253                  ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1,
 254                  ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset,
 255                  ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize));
 256 
 257     BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
 258                  "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
 259                  "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer,
 260                  ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1,
 261                  ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
 262                  ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));
 263 
 264     grc = GNI_SmsgInit (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr.smsg_attr,
 265                         &ep->remote_attr->smsg_attr);
 266     if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
 267         BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));
 268 
 269         return mca_btl_rc_ugni_to_opal (grc);
 270     }
 271 
 272     /* set the local event data to the local index and the remote event data to my
 273      * index on the remote peer. This makes lookup of endpoints on completion take
 274      * a single lookup in the endpoints array. we will not be able to change the
 275      * remote peer's index in the endpoint's array after this point. */
 276     GNI_EpSetEventData (ep->smsg_ep_handle.gni_handle, ep->index, ep->remote_attr->index);
 277 
 278     ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
 279     ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
 280     (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, 1);
 281 
 282     /* send all pending messages */
 283     BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
 284 
 285     rc = mca_btl_ugni_progress_send_wait_list (ep);
 286     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 287         OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
 288         if (false == ep->wait_listed) {
 289             opal_list_append (&ugni_module->ep_wait_list, &ep->super);
 290             ep->wait_listed = true;
 291         }
 292         OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
 293     }
 294 
 295     free (ep->remote_attr);
 296     ep->remote_attr = NULL;
 297 
 298     return OPAL_SUCCESS;
 299 }
 300 
 301 static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
 302 {
 303     mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
 304     gni_return_t rc;
 305 
 306     BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep));
 307     /* the irq cq is associated with only the first device */
 308     ep->mailbox->attr.rmt_irq_mem_hndl = ugni_module->devices->smsg_irq_mhndl;
 309 
 310     rc = GNI_EpPostDataWId (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
 311                             ep->remote_attr, sizeof (*ep->remote_attr),
 312                             MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
 313     if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
 314         (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1);
 315     }
 316 
 317     return mca_btl_rc_ugni_to_opal (rc);
 318 }
 319 
 320 int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module)
 321 {
 322     gni_return_t rc;
 323 
 324     BTL_VERBOSE(("posting wildcard datagram"));
 325 
 326     memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
 327     memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
 328     rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
 329                             sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
 330                             sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
 331 
 332     return mca_btl_rc_ugni_to_opal (rc);
 333 }
 334 
 335 
 336 int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep)
 337 {
 338     int rc;
 339 
 340     BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state));
 341 
 342     if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
 343         return OPAL_SUCCESS;
 344     }
 345 
 346     if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
 347         rc = mca_btl_ugni_ep_connect_start (ep);
 348         if (OPAL_SUCCESS != rc) {
 349             return rc;
 350         }
 351     }
 352 
 353     BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = %p}", ep->remote_attr->smsg_attr.msg_type,
 354                  (void*)ep->remote_attr->smsg_attr.msg_buffer));
 355 
 356     if (GNI_SMSG_TYPE_INVALID == ep->remote_attr->smsg_attr.msg_type) {
 357         /* use datagram to exchange connection information with the remote peer */
 358         if (!ep->dg_posted) {
 359             rc = mca_btl_ugni_directed_ep_post (ep);
 360             if (OPAL_SUCCESS == rc) {
 361                 ep->dg_posted = true;
 362                 rc = OPAL_ERR_RESOURCE_BUSY;
 363             }
 364 
 365             return rc;
 366         }
 367 
 368         return OPAL_SUCCESS;
 369     }
 370 
 371     return mca_btl_ugni_ep_connect_finish (ep);
 372 }
 373 
 374 int mca_btl_ugni_ep_handle_init (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
 375                                  mca_btl_ugni_device_t *device, mca_btl_ugni_endpoint_handle_t *ep_handle)
 376 {
 377     gni_return_t grc;
 378 
 379     ep_handle->device = device;
 380 
 381     /* create a uGNI endpoint handle and bind it to the remote peer */
 382     grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle);
 383     if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
 384         grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id);
 385     }
 386 
 387     return mca_btl_rc_ugni_to_opal (grc);
 388 }
 389 
 390 int mca_btl_ugni_ep_handle_cleanup (mca_btl_ugni_endpoint_handle_t *ep_handle)
 391 {
 392     int rc;
 393 
 394     if (0 == ep_handle->gni_handle) {
 395         return OPAL_SUCCESS;
 396     }
 397 
 398     /* TODO: need to fix, may be outstanding tx's, etc. */
 399     rc = GNI_EpUnbind (ep_handle->gni_handle);
 400     if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
 401         /* should warn */
 402     } else {
 403         (void) GNI_EpDestroy (ep_handle->gni_handle);
 404     }
 405 
 406     ep_handle->gni_handle = 0;
 407 
 408     return OPAL_SUCCESS;
 409 }

/* [<][>][^][v][top][bottom][index][help] */