root/opal/mca/btl/uct/btl_uct_am.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_uct_alloc
  2. _mca_btl_uct_send_pack
  3. mca_btl_uct_prepare_src
  4. mca_btl_uct_free
  5. mca_btl_uct_send_frag_pack
  6. mca_btl_uct_append_pending_frag
  7. mca_btl_uct_send_frag
  8. mca_btl_uct_send
  9. mca_btl_uct_sendi_pack
  10. mca_btl_uct_max_sendi
  11. mca_btl_uct_sendi

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2018      Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * $COPYRIGHT$
   6  *
   7  * Additional copyrights may follow
   8  *
   9  * $HEADER$
  10  */
  11 
  12 #include "btl_uct_am.h"
  13 #include "btl_uct_rdma.h"
  14 #include "btl_uct_device_context.h"
  15 
  16 /**
  17  * Allocate a segment.
  18  *
  19  * @param btl (IN)      BTL module
  20  * @param size (IN)     Request segment size.
  21  */
  22 mca_btl_base_descriptor_t *mca_btl_uct_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
  23                                               uint8_t order, size_t size, uint32_t flags)
  24 {
  25     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
  26     mca_btl_uct_base_frag_t *frag = NULL;
  27 
  28     if (size <= (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
  29         frag = mca_btl_uct_frag_alloc_short (uct_btl, endpoint);
  30     } else if (size <= uct_btl->super.btl_eager_limit) {
  31         frag = mca_btl_uct_frag_alloc_eager (uct_btl, endpoint);
  32     } else {
  33         frag = mca_btl_uct_frag_alloc_max (uct_btl, endpoint);
  34     }
  35 
  36     if (OPAL_LIKELY(frag != NULL)) {
  37         frag->segments[0].seg_len  = size;
  38 
  39         frag->base.des_segment_count = 1;
  40         frag->base.des_flags   = flags;
  41         frag->base.order       = order;
  42         frag->uct_iov.length = size;
  43         if (NULL != frag->base.super.registration) {
  44             /* zero-copy fragments will need callbacks */
  45             frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
  46         }
  47     }
  48 
  49     return (mca_btl_base_descriptor_t *) frag;
  50 }
  51 
  52 static inline void _mca_btl_uct_send_pack (void *data, void *header, size_t header_size, opal_convertor_t *convertor,
  53                                            size_t payload_size)
  54 {
  55     uint32_t iov_count = 1;
  56     struct iovec iov;
  57     size_t length;
  58 
  59     if (header_size > 0) {
  60         assert (NULL != header);
  61         memcpy (data, header, header_size);
  62     }
  63 
  64     /* pack the data into the supplied buffer */
  65     iov.iov_base = (IOVBASE_TYPE *) ((intptr_t) data + header_size);
  66     iov.iov_len  = length = payload_size;
  67 
  68     (void) opal_convertor_pack (convertor, &iov, &iov_count, &length);
  69 
  70     assert (length == payload_size);
  71 }
  72 
  73 struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src (mca_btl_base_module_t *btl,
  74                                                            mca_btl_base_endpoint_t *endpoint,
  75                                                            opal_convertor_t *convertor,
  76                                                            uint8_t order, size_t reserve,
  77                                                            size_t *size, uint32_t flags)
  78 {
  79     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
  80     const size_t total_size = reserve + *size;
  81     mca_btl_uct_base_frag_t *frag;
  82     void *data_ptr;
  83 
  84     /* in place send fragment */
  85     if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor) || total_size > uct_btl->super.btl_eager_limit)) {
  86         frag = (mca_btl_uct_base_frag_t *) mca_btl_uct_alloc (btl, endpoint, order, total_size, flags);
  87         if (OPAL_UNLIKELY(NULL == frag)) {
  88             return NULL;
  89         }
  90 
  91         _mca_btl_uct_send_pack ((void *) ((intptr_t) frag->uct_iov.buffer + reserve), NULL, 0,
  92                                 convertor, *size);
  93     } else {
  94         opal_convertor_get_current_pointer (convertor, &data_ptr);
  95         assert (NULL != data_ptr);
  96 
  97         frag = mca_btl_uct_frag_alloc_short (uct_btl, endpoint);
  98         if (OPAL_UNLIKELY(NULL == frag)) {
  99             return NULL;
 100         }
 101 
 102         frag->uct_iov.length   = total_size;
 103         frag->base.order       = order;
 104         frag->base.des_flags   = flags;
 105         if (total_size > (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
 106             frag->segments[0].seg_len = reserve;
 107             frag->segments[1].seg_len = *size;
 108             frag->segments[1].seg_addr.pval = data_ptr;
 109             frag->base.des_segment_count = 2;
 110         } else {
 111             frag->segments[0].seg_len = total_size;
 112             memcpy ((void *)((intptr_t) frag->segments[1].seg_addr.pval + reserve), data_ptr, *size);
 113             frag->base.des_segment_count = 1;
 114         }
 115     }
 116 
 117     return &frag->base;
 118 }
 119 
 120 /**
 121  * Return a segment allocated by this BTL.
 122  *
 123  * @param btl (IN)      BTL module
 124  * @param segment (IN)  Allocated segment.
 125  */
 126 int mca_btl_uct_free (mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des)
 127 {
 128     mca_btl_uct_frag_return ((mca_btl_uct_base_frag_t *) des);
 129     return OPAL_SUCCESS;
 130 }
 131 
 132 static size_t mca_btl_uct_send_frag_pack (void *data, void *arg)
 133 {
 134     mca_btl_uct_base_frag_t *frag = (mca_btl_uct_base_frag_t *) arg;
 135     size_t length = 8;
 136 
 137     memcpy (data, &frag->header, sizeof (frag->header));
 138     data = (void *)((intptr_t) data + 8);
 139 
 140     /* this function should only ever get called with fragments with two segments */
 141     for (size_t i = 0 ; i < frag->base.des_segment_count ; ++i) {
 142         const size_t seg_len = frag->segments[i].seg_len;
 143         memcpy (data, frag->segments[i].seg_addr.pval, seg_len);
 144         data = (void *)((intptr_t) data + seg_len);
 145         length += seg_len;
 146     }
 147 
 148     return length;
 149 }
 150 
 151 static void mca_btl_uct_append_pending_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag,
 152                                              mca_btl_uct_device_context_t *context, bool ready)
 153 {
 154     frag->ready = ready;
 155     frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
 156     opal_atomic_wmb ();
 157 
 158     opal_list_append (&uct_btl->pending_frags, (opal_list_item_t *) frag);
 159 }
 160 
 161 int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag, bool append)
 162 {
 163     mca_btl_uct_device_context_t *context = frag->context;
 164     const ssize_t msg_size = frag->uct_iov.length + 8;
 165     ssize_t size;
 166     ucs_status_t ucs_status;
 167     uct_ep_h ep_handle = NULL;
 168 
 169     /* if we get here then we must have an endpoint handle for this context/endpoint pair */
 170     (void) mca_btl_uct_endpoint_test_am (uct_btl, frag->endpoint, frag->context, &ep_handle);
 171     assert (NULL != ep_handle);
 172 
 173     /* if another thread set this we really don't care too much as this flag is only meant
 174      * to protect against deep recursion */
 175     if (!context->in_am_callback) {
 176         mca_btl_uct_context_lock (context);
 177         /* attempt to post the fragment */
 178         if (NULL != frag->base.super.registration) {
 179             frag->comp.dev_context = context;
 180             ucs_status = uct_ep_am_zcopy (ep_handle, MCA_BTL_UCT_FRAG, &frag->header, sizeof (frag->header),
 181                                           &frag->uct_iov, 1, 0, &frag->comp.uct_comp);
 182 
 183             if (OPAL_LIKELY(UCS_INPROGRESS == ucs_status)) {
 184                 uct_worker_progress (context->uct_worker);
 185                 mca_btl_uct_context_unlock (context);
 186                 return OPAL_SUCCESS;
 187             }
 188         } else {
 189             /* short message */
 190             if (1 == frag->base.des_segment_count && (frag->uct_iov.length + 8) < MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
 191                 ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, frag->header.value, frag->uct_iov.buffer,
 192                                               frag->uct_iov.length);
 193 
 194                 if (OPAL_LIKELY(UCS_OK == ucs_status)) {
 195                     uct_worker_progress (context->uct_worker);
 196                     mca_btl_uct_context_unlock (context);
 197                     /* send is complete */
 198                     mca_btl_uct_frag_complete (frag, OPAL_SUCCESS);
 199                     return 1;
 200                 }
 201             }
 202 
 203             size = uct_ep_am_bcopy (ep_handle, MCA_BTL_UCT_FRAG, mca_btl_uct_send_frag_pack, frag, 0);
 204             if (OPAL_LIKELY(size == msg_size)) {
 205                 uct_worker_progress (context->uct_worker);
 206                 mca_btl_uct_context_unlock (context);
 207                 /* send is complete */
 208                 mca_btl_uct_frag_complete (frag, OPAL_SUCCESS);
 209                 return 1;
 210             }
 211         }
 212 
 213         /* wait for something to happen */
 214         uct_worker_progress (context->uct_worker);
 215         mca_btl_uct_context_unlock (context);
 216 
 217         mca_btl_uct_device_handle_completions (context);
 218     }
 219 
 220     if (!append) {
 221         return OPAL_ERR_OUT_OF_RESOURCE;
 222     }
 223 
 224     OPAL_THREAD_LOCK(&uct_btl->lock);
 225     mca_btl_uct_append_pending_frag (uct_btl, frag, context, true);
 226     OPAL_THREAD_UNLOCK(&uct_btl->lock);
 227 
 228     return OPAL_SUCCESS;
 229 }
 230 
 231 int mca_btl_uct_send (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, mca_btl_base_descriptor_t *descriptor,
 232                       mca_btl_base_tag_t tag)
 233 {
 234     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
 235     mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_am_context (uct_btl);
 236     mca_btl_uct_base_frag_t *frag = (mca_btl_uct_base_frag_t *) descriptor;
 237     uct_ep_h ep_handle;
 238     int rc;
 239 
 240     BTL_VERBOSE(("btl/uct sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
 241                  OPAL_PROC_MY_NAME.vpid, endpoint->ep_proc->proc_name.vpid, frag->uct_iov.length));
 242 
 243 
 244     frag->header.data.tag = tag;
 245     frag->context = context;
 246 
 247     rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle);
 248     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 249         OPAL_THREAD_LOCK(&uct_btl->lock);
 250         /* check one more time in case another thread is completing the connection now */
 251         if (OPAL_SUCCESS != mca_btl_uct_endpoint_test_am (uct_btl, endpoint, context, &ep_handle)) {
 252             mca_btl_uct_append_pending_frag (uct_btl, frag, context, false);
 253             OPAL_THREAD_UNLOCK(&uct_btl->lock);
 254             return OPAL_SUCCESS;
 255         }
 256         OPAL_THREAD_UNLOCK(&uct_btl->lock);
 257     }
 258 
 259     return mca_btl_uct_send_frag (uct_btl, frag, true);
 260 }
 261 
 262 struct mca_btl_uct_sendi_pack_args_t {
 263     uint64_t am_header;
 264     void *header;
 265     size_t header_size;
 266     opal_convertor_t *convertor;
 267     size_t payload_size;
 268 };
 269 
 270 typedef struct mca_btl_uct_sendi_pack_args_t mca_btl_uct_sendi_pack_args_t;
 271 
 272 static size_t mca_btl_uct_sendi_pack (void *data, void *arg)
 273 {
 274     mca_btl_uct_sendi_pack_args_t *args = (mca_btl_uct_sendi_pack_args_t *) arg;
 275     mca_btl_uct_am_header_t *am_header = (mca_btl_uct_am_header_t *) data;
 276 
 277     am_header->value = args->am_header;
 278     _mca_btl_uct_send_pack ((void *)((intptr_t)data + 8), args->header, args->header_size, args->convertor,
 279                             args->payload_size);
 280     return args->header_size + args->payload_size + 8;
 281 }
 282 
 283 static inline size_t mca_btl_uct_max_sendi (mca_btl_uct_module_t *uct_btl, int context_id)
 284 {
 285     return MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context_id).cap.am.max_bcopy;
 286 }
 287 
 288 int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, opal_convertor_t *convertor,
 289                        void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags,
 290                        mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor)
 291 {
 292     mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
 293     mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_am_context (uct_btl);
 294     const size_t total_size = header_size + payload_size;
 295     /* message with header */
 296     const size_t msg_size = total_size + 8;
 297     mca_btl_uct_am_header_t am_header;
 298     ucs_status_t ucs_status = UCS_ERR_NO_RESOURCE;
 299     uct_ep_h ep_handle;
 300     int rc;
 301 
 302     rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle);
 303     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || msg_size > mca_btl_uct_max_sendi (uct_btl, context->context_id))) {
 304         if (descriptor) {
 305             *descriptor = mca_btl_uct_alloc (btl, endpoint, order, total_size, flags);
 306         }
 307 
 308         return OPAL_ERR_OUT_OF_RESOURCE;
 309     }
 310 
 311     am_header.data.tag = tag;
 312 
 313     mca_btl_uct_context_lock (context);
 314     if (0 == payload_size) {
 315         ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, header, header_size);
 316     } else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id).cap.am.max_short) {
 317         int8_t *data = alloca (total_size);
 318         _mca_btl_uct_send_pack (data, header, header_size, convertor, payload_size);
 319         ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data, total_size);
 320     } else {
 321         ssize_t size;
 322 
 323         size = uct_ep_am_bcopy (ep_handle, MCA_BTL_UCT_FRAG, mca_btl_uct_sendi_pack,
 324                                 &(mca_btl_uct_sendi_pack_args_t) {.am_header = am_header.value,
 325                                         .header = header, .header_size = header_size,
 326                                         .convertor = convertor, .payload_size = payload_size}, 0);
 327         if (OPAL_LIKELY(size == (ssize_t) msg_size)) {
 328             ucs_status = UCS_OK;
 329         }
 330     }
 331 
 332     mca_btl_uct_context_unlock (context);
 333 
 334     if (OPAL_UNLIKELY(UCS_OK != ucs_status)) {
 335         if (descriptor) {
 336             *descriptor = mca_btl_uct_alloc (btl, endpoint, order, total_size, flags);
 337         }
 338 
 339         return OPAL_ERR_OUT_OF_RESOURCE;
 340     }
 341 
 342     return OPAL_SUCCESS;
 343 }

/* [<][>][^][v][top][bottom][index][help] */