root/opal/mca/btl/uct/btl_uct_rdma.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_uct_uct_completion
  2. mca_btl_uct_uct_completion_construct
  3. mca_btl_uct_uct_completion_alloc
  4. mca_btl_uct_uct_completion_release
  5. mca_btl_uct_get_unpack
  6. mca_btl_uct_get
  7. mca_btl_uct_put_pack
  8. mca_btl_uct_put
  9. mca_btl_uct_flush
  10. mca_btl_uct_flush_thread

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * $COPYRIGHT$
   6  *
   7  * Additional copyrights may follow
   8  *
   9  * $HEADER$
  10  */
  11 
  12 #include "btl_uct_device_context.h"
  13 
  14 void mca_btl_uct_uct_completion (uct_completion_t *uct_comp, ucs_status_t status)
  15 {
  16     mca_btl_uct_uct_completion_t *comp = (mca_btl_uct_uct_completion_t *) ((uintptr_t) uct_comp - offsetof (mca_btl_uct_uct_completion_t, uct_comp));
  17 
  18     BTL_VERBOSE(("network operation complete. status = %d", status));
  19 
  20     comp->status = status;
  21     opal_fifo_push (&comp->dev_context->completion_fifo, &comp->super.super);
  22 }
  23 
  24 
  25 static void mca_btl_uct_uct_completion_construct (mca_btl_uct_uct_completion_t *comp)
  26 {
  27     comp->frag = NULL;
  28     comp->uct_comp.func = mca_btl_uct_uct_completion;
  29 }
  30 
  31 OBJ_CLASS_INSTANCE(mca_btl_uct_uct_completion_t, opal_free_list_item_t, mca_btl_uct_uct_completion_construct, NULL);
  32 
  33 
  34 mca_btl_uct_uct_completion_t *
  35 mca_btl_uct_uct_completion_alloc (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint,
  36                                   void *local_address, mca_btl_base_registration_handle_t *local_handle,
  37                                   mca_btl_uct_device_context_t *dev_context, mca_btl_base_rdma_completion_fn_t cbfunc,
  38                                   void *cbcontext, void *cbdata)
  39 {
  40     mca_btl_uct_uct_completion_t *comp = (mca_btl_uct_uct_completion_t *) opal_free_list_get (&dev_context->rdma_completions);
  41     if (OPAL_LIKELY(NULL != comp)) {
  42         comp->uct_comp.count = 1;
  43         comp->btl = &uct_btl->super;
  44         comp->endpoint = endpoint;
  45         comp->local_address = local_address;
  46         comp->local_handle = local_handle;
  47         comp->cbfunc = cbfunc;
  48         comp->cbcontext = cbcontext;
  49         comp->cbdata = cbdata;
  50         comp->dev_context = dev_context;
  51     }
  52 
  53     return comp;
  54 }
  55 
  56 void mca_btl_uct_uct_completion_release (mca_btl_uct_uct_completion_t *comp)
  57 {
  58     if (comp) {
  59         opal_free_list_return (&comp->dev_context->rdma_completions, &comp->super);
  60     }
  61 }
  62 
  63 static void mca_btl_uct_get_unpack (void *arg, const void *data, size_t length)
  64 {
  65     memcpy (arg, data, length);
  66 }
  67 
  68 int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
  69                       uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
  70                       mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
  71                       int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
  72 {
  73     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
  74     mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
  75     mca_btl_uct_uct_completion_t *comp = NULL;
  76     ucs_status_t ucs_status;
  77     uct_rkey_bundle_t rkey;
  78     uct_ep_h ep_handle;
  79     int rc;
  80 
  81     BTL_VERBOSE(("performing get operation. local address: %p, length: %lu", local_address, (unsigned long) size));
  82 
  83     if (cbfunc) {
  84         comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
  85                                                  cbfunc, cbcontext, cbdata);
  86         if (OPAL_UNLIKELY(NULL == comp)) {
  87             BTL_VERBOSE(("culd not allocate completion structure"));
  88             return OPAL_ERR_OUT_OF_RESOURCE;
  89         }
  90     }
  91 
  92     rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
  93     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
  94         BTL_VERBOSE(("mca_btl_uct_get_rkey returned %d", rc));
  95         mca_btl_uct_uct_completion_release (comp);
  96         return rc;
  97     }
  98 
  99     mca_btl_uct_context_lock (context);
 100 
 101     if (size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.get.max_bcopy) {
 102         ucs_status = uct_ep_get_bcopy (ep_handle, mca_btl_uct_get_unpack, local_address, size, remote_address,
 103                                        rkey.rkey, &comp->uct_comp);
 104     } else {
 105         uct_iov_t iov = {.buffer = local_address, .length = size, .stride = 0, .count = 1,
 106                          .memh = MCA_BTL_UCT_REG_REMOTE_TO_LOCAL(local_handle)->uct_memh};
 107         ucs_status = uct_ep_get_zcopy (ep_handle, &iov, 1, remote_address, rkey.rkey, &comp->uct_comp);
 108     }
 109 
 110     /* go ahead and progress the worker while we have the lock (if we are not in an AM callback) */
 111     if (!context->in_am_callback) {
 112         (void) uct_worker_progress (context->uct_worker);
 113     }
 114 
 115     mca_btl_uct_context_unlock (context);
 116 
 117     if (!context->in_am_callback) {
 118         mca_btl_uct_device_handle_completions (context);
 119     }
 120 
 121     if (UCS_OK == ucs_status && cbfunc) {
 122         /* if UCS_OK is returned the callback will never fire so we have to make the callback
 123          * ourselves */
 124         cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
 125     }
 126 
 127     if (UCS_INPROGRESS == ucs_status) {
 128         ucs_status = UCS_OK;
 129     } else {
 130         mca_btl_uct_uct_completion_release (comp);
 131     }
 132 
 133     BTL_VERBOSE(("get issued. status = %d", ucs_status));
 134 
 135     uct_rkey_release (&rkey);
 136 
 137     return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
 138 }
 139 
 140 struct mca_btl_uct_put_pack_args_t {
 141     void *local_address;
 142     size_t size;
 143 };
 144 
 145 typedef struct mca_btl_uct_put_pack_args_t mca_btl_uct_put_pack_args_t;
 146 
 147 static size_t mca_btl_uct_put_pack (void *dest, void *arg)
 148 {
 149     mca_btl_uct_put_pack_args_t *args = (mca_btl_uct_put_pack_args_t *) arg;
 150 
 151     memcpy (dest, args->local_address, args->size);
 152     return args->size;
 153 }
 154 
 155 int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
 156                       uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 157                       mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 158                       int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
 159 {
 160     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
 161     mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
 162     mca_btl_uct_uct_completion_t *comp = NULL;
 163     ucs_status_t ucs_status;
 164     uct_rkey_bundle_t rkey;
 165     uct_ep_h ep_handle;
 166     bool use_short = false;
 167     bool use_bcopy = false;
 168     int rc;
 169 
 170     BTL_VERBOSE(("performing put operation. local address: %p, length: %lu", local_address, (unsigned long) size));
 171 
 172     if (size > uct_btl->super.btl_put_local_registration_threshold && cbfunc) {
 173         comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
 174                                                  cbfunc, cbcontext, cbdata);
 175         if (OPAL_UNLIKELY(NULL == comp)) {
 176             return OPAL_ERR_OUT_OF_RESOURCE;
 177         }
 178     }
 179 
 180     rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
 181     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 182         mca_btl_uct_uct_completion_release (comp);
 183         return rc;
 184     }
 185 
 186     mca_btl_uct_context_lock (context);
 187 
 188     /* determine what UCT prototol should be used */
 189     if (size <= uct_btl->super.btl_put_local_registration_threshold) {
 190         use_short = size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.put.max_short;
 191         use_bcopy = !use_short;
 192     }
 193 
 194     do {
 195         if (use_short) {
 196             ucs_status = uct_ep_put_short (ep_handle, local_address, size, remote_address, rkey.rkey);
 197         } else if (use_bcopy) {
 198             ssize_t tmp = uct_ep_put_bcopy (ep_handle, mca_btl_uct_put_pack,
 199                                             &(mca_btl_uct_put_pack_args_t) {.local_address = local_address,
 200                                                     .size = size},
 201                                             remote_address, rkey.rkey);
 202             ucs_status = (tmp == (ssize_t) size) ? UCS_OK : UCS_ERR_NO_RESOURCE;
 203         } else {
 204             uct_iov_t iov = {.buffer = local_address, .length = size, .stride = 0, .count = 1,
 205                          .memh = MCA_BTL_UCT_REG_REMOTE_TO_LOCAL(local_handle)->uct_memh};
 206 
 207             ucs_status = uct_ep_put_zcopy (ep_handle, &iov, 1, remote_address, rkey.rkey, &comp->uct_comp);
 208         }
 209 
 210         /* go ahead and progress the worker while we have the lock */
 211         if (UCS_ERR_NO_RESOURCE != ucs_status || context->in_am_callback) {
 212             if (!context->in_am_callback) {
 213                 (void) uct_worker_progress (context->uct_worker);
 214             }
 215 
 216             break;
 217         }
 218 
 219         /* wait for something to complete */
 220         while (!uct_worker_progress (context->uct_worker));
 221     } while (1);
 222 
 223     mca_btl_uct_context_unlock (context);
 224 
 225     mca_btl_uct_device_handle_completions (context);
 226 
 227     if (UCS_OK == ucs_status && cbfunc) {
 228         /* if UCS_OK is returned the callback will never fire so we have to make the callback
 229          * ourselves. this callback is possibly being made before the data is visible to the
 230          * remote process. */
 231         cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
 232     }
 233 
 234     if (UCS_INPROGRESS == ucs_status) {
 235         ucs_status = UCS_OK;
 236     } else {
 237         mca_btl_uct_uct_completion_release (comp);
 238     }
 239 
 240     uct_rkey_release (&rkey);
 241 
 242     return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
 243 }
 244 
 245 int mca_btl_uct_flush (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint)
 246 {
 247     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
 248     const int tl_index = uct_btl->rdma_tl->tl_index;
 249     const int context_count = mca_btl_uct_component.num_contexts_per_module;
 250     ucs_status_t ucs_status;
 251 
 252     BTL_VERBOSE(("mca_btl_uct_flush starting"));
 253 
 254     for (int i = 0 ; i < context_count ; ++i) {
 255         mca_btl_uct_device_context_t *context = uct_btl->rdma_tl->uct_dev_contexts[i];
 256 
 257         if (NULL == context) {
 258             continue;
 259         }
 260 
 261         mca_btl_uct_context_lock (context);
 262         /* this loop is here because at least some of the TLs do no support a
 263          * completion callback. its a real PIA but has to be done for now. */
 264         do {
 265             uct_worker_progress (context->uct_worker);
 266 
 267             if (NULL != endpoint && endpoint->uct_eps[context->context_id][tl_index].uct_ep) {
 268                 ucs_status = uct_ep_flush (endpoint->uct_eps[context->context_id][tl_index].uct_ep, 0, NULL);
 269             } else {
 270                 ucs_status = uct_iface_flush (context->uct_iface, 0, NULL);
 271             }
 272         } while (UCS_INPROGRESS == ucs_status);
 273 
 274         mca_btl_uct_context_unlock (context);
 275         mca_btl_uct_device_handle_completions (context);
 276     }
 277 
 278     return OPAL_SUCCESS;
 279 }
 280 
 281 int mca_btl_uct_flush_thread (mca_btl_base_module_t *btl)
 282 {
 283     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
 284     const int context_id = mca_btl_uct_get_context_index ();
 285     mca_btl_uct_device_context_t *context = uct_btl->rdma_tl->uct_dev_contexts[context_id];
 286     ucs_status_t ucs_status;
 287 
 288     BTL_VERBOSE(("mca_btl_uct_flush_thread starting"));
 289 
 290     if (NULL == context) {
 291         return OPAL_SUCCESS;
 292     }
 293 
 294     mca_btl_uct_context_lock (context);
 295 
 296     /* this loop is here because at least some of the TLs do no support a
 297      * completion callback. its a real PIA but has to be done for now. */
 298     do {
 299         uct_worker_progress (context->uct_worker);
 300         ucs_status = uct_iface_flush (context->uct_iface, 0, NULL);
 301     } while (UCS_INPROGRESS == ucs_status);
 302 
 303     mca_btl_uct_context_unlock (context);
 304 
 305     mca_btl_uct_device_handle_completions (context);
 306 
 307     return OPAL_SUCCESS;
 308 }

/* [<][>][^][v][top][bottom][index][help] */