root/opal/mca/btl/vader/btl_vader_fbox.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. mca_btl_vader_fbox_set_header
  2. mca_btl_vader_fbox_read_header
  3. mca_btl_vader_fbox_sendi
  4. mca_btl_vader_check_fboxes
  5. mca_btl_vader_try_fbox_setup

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * Copyright (c) 2018      Triad National Security, LLC. All rights
   6  *                         reserved.
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 #if !defined(MCA_BTL_VADER_FBOX_H)
  15 #define MCA_BTL_VADER_FBOX_H
  16 
  17 #include "btl_vader.h"
  18 
  19 #define MCA_BTL_VADER_POLL_COUNT 31
  20 
  21 typedef union mca_btl_vader_fbox_hdr_t {
  22     struct {
  23         /* NTH: on 32-bit platforms loading/unloading the header may be completed
  24          * in multiple instructions. To ensure that seq is never loaded before tag
  25          * and the tag is never read before seq put them in the same 32-bits of the
  26          * header. */
  27         /** message size */
  28         uint32_t  size;
  29         /** message tag */
  30         uint16_t  tag;
  31         /** sequence number */
  32         uint16_t  seq;
  33     } data;
  34     struct {
  35         uint32_t value0;
  36         uint32_t value1;
  37     } data_i32;
  38     uint64_t ival;
  39 } mca_btl_vader_fbox_hdr_t;
  40 
  41 #define MCA_BTL_VADER_FBOX_HDR(x) ((mca_btl_vader_fbox_hdr_t *) (x))
  42 
  43 #define MCA_BTL_VADER_FBOX_OFFSET_MASK 0x7fffffff
  44 #define MCA_BTL_VADER_FBOX_HB_MASK     0x80000000
  45 
  46 /* if the two offsets are equal and the high bit matches the buffer is empty else the buffer is full.
  47  * note that start will never be end - 1 so this simplified conditional will always produce the correct
  48  * result */
  49 #define BUFFER_FREE(s,e,hbm,size) (((s + !hbm) > (e)) ? (s) - (e) : (size - (e)))
  50 
  51 /** macro for checking if the high bit is set */
  52 #define MCA_BTL_VADER_FBOX_OFFSET_HBS(v) (!!((v) & MCA_BTL_VADER_FBOX_HB_MASK))
  53 
  54 void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endpoint_t *ep);
  55 
  56 static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag,
  57                                                   uint16_t seq, uint32_t size)
  58 {
  59     mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}};
  60     /* clear out existing tag/seq */
  61     hdr->data_i32.value1 = 0;
  62     opal_atomic_wmb ();
  63     hdr->data_i32.value0 = size;
  64     opal_atomic_wmb ();
  65     hdr->data_i32.value1 = tmp.data_i32.value1;
  66 }
  67 
  68 static inline mca_btl_vader_fbox_hdr_t mca_btl_vader_fbox_read_header (mca_btl_vader_fbox_hdr_t *hdr)
  69 {
  70     mca_btl_vader_fbox_hdr_t tmp = {.data_i32 = {.value1 = hdr->data_i32.value1}};;
  71     opal_atomic_rmb ();
  72     tmp.data_i32.value0 = hdr->data_i32.value0;
  73     return tmp;
  74 }
  75 
  76 /* attempt to reserve a contiguous segment from the remote ep */
  77 static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsigned char tag,
  78                                              void * restrict header, const size_t header_size,
  79                                              void * restrict payload, const size_t payload_size)
  80 {
  81     const unsigned int fbox_size = mca_btl_vader_component.fbox_size;
  82     size_t size = header_size + payload_size;
  83     unsigned int start, end, buffer_free;
  84     size_t data_size = size;
  85     unsigned char *dst, *data;
  86     bool hbs, hbm;
  87 
  88     /* don't try to use the per-peer buffer for messages that will fill up more than 25% of the buffer */
  89     if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer || size > (fbox_size >> 2))) {
  90         return false;
  91     }
  92 
  93     OPAL_THREAD_LOCK(&ep->lock);
  94 
  95     /* the high bit helps determine if the buffer is empty or full */
  96     hbs = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_out.end);
  97     hbm = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_out.start) == hbs;
  98 
  99     /* read current start and end offsets and check for free space */
 100     start = ep->fbox_out.start & MCA_BTL_VADER_FBOX_OFFSET_MASK;
 101     end = ep->fbox_out.end & MCA_BTL_VADER_FBOX_OFFSET_MASK;
 102     buffer_free = BUFFER_FREE(start, end, hbm, fbox_size);
 103 
 104     /* need space for the fragment + the header */
 105     size = (size + sizeof (mca_btl_vader_fbox_hdr_t) + MCA_BTL_VADER_FBOX_ALIGNMENT_MASK) & ~MCA_BTL_VADER_FBOX_ALIGNMENT_MASK;
 106 
 107     dst = ep->fbox_out.buffer + end;
 108 
 109     if (OPAL_UNLIKELY(buffer_free < size)) {
 110         /* check if we need to free up space for this fragment */
 111         BTL_VERBOSE(("not enough room for a fragment of size %u. in use buffer segment: {start: %x, end: %x, high bit matches: %d}",
 112                      (unsigned) size, start, end, (int) hbm));
 113 
 114         /* read the current start pointer from the remote peer and recalculate the available buffer space */
 115         start = ep->fbox_out.start = ep->fbox_out.startp[0];
 116 
 117         /* recalculate how much buffer space is available */
 118         start &= MCA_BTL_VADER_FBOX_OFFSET_MASK;
 119         hbm = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_out.start) == hbs;
 120         buffer_free = BUFFER_FREE(start, end, hbm, fbox_size);
 121 
 122         opal_atomic_rmb ();
 123 
 124         /* if this is the end of the buffer and the fragment doesn't fit then mark the remaining buffer space to
 125          * be skipped and check if the fragment can be written at the beginning of the buffer. */
 126         if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
 127             BTL_VERBOSE(("message will not fit in remaining buffer space. skipping to beginning"));
 128 
 129             mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0xff, ep->fbox_out.seq++,
 130                                            buffer_free - sizeof (mca_btl_vader_fbox_hdr_t));
 131 
 132             end = MCA_BTL_VADER_FBOX_ALIGNMENT;
 133             /* toggle the high bit */
 134             hbs = !hbs;
 135             /* toggle the high bit match */
 136             buffer_free = BUFFER_FREE(start, end, !hbm, fbox_size);
 137             dst = ep->fbox_out.buffer + end;
 138         }
 139 
 140         if (OPAL_UNLIKELY(buffer_free < size)) {
 141             ep->fbox_out.end = (hbs << 31) | end;
 142             opal_atomic_wmb ();
 143             OPAL_THREAD_UNLOCK(&ep->lock);
 144             return false;
 145         }
 146     }
 147 
 148     BTL_VERBOSE(("writing fragment of size %u to offset %u {start: 0x%x, end: 0x%x (hbs: %d)} of peer's buffer. free = %u",
 149                  (unsigned int) size, end, start, end, hbs, buffer_free));
 150 
 151     data = dst + sizeof (mca_btl_vader_fbox_hdr_t);
 152 
 153     memcpy (data, header, header_size);
 154     if (payload) {
 155         /* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
 156         memcpy (data + header_size, payload, payload_size);
 157     }
 158 
 159     end += size;
 160 
 161     if (OPAL_UNLIKELY(fbox_size == end)) {
 162         /* toggle the high bit */
 163         hbs = !hbs;
 164         /* reset the end pointer to the beginning of the buffer */
 165         end = MCA_BTL_VADER_FBOX_ALIGNMENT;
 166     } else if (buffer_free > size) {
 167         MCA_BTL_VADER_FBOX_HDR(ep->fbox_out.buffer + end)->ival = 0;
 168     }
 169 
 170     /* write out part of the header now. the tag will be written when the data is available */
 171     mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size);
 172 
 173     /* align the buffer */
 174     ep->fbox_out.end = ((uint32_t) hbs << 31) | end;
 175     opal_atomic_wmb ();
 176     OPAL_THREAD_UNLOCK(&ep->lock);
 177 
 178     return true;
 179 }
 180 
 181 static inline bool mca_btl_vader_check_fboxes (void)
 182 {
 183     const unsigned int fbox_size = mca_btl_vader_component.fbox_size;
 184     bool processed = false;
 185 
 186     for (unsigned int i = 0 ; i < mca_btl_vader_component.num_fbox_in_endpoints ; ++i) {
 187         mca_btl_base_endpoint_t *ep = mca_btl_vader_component.fbox_in_endpoints[i];
 188         unsigned int start = ep->fbox_in.start & MCA_BTL_VADER_FBOX_OFFSET_MASK;
 189 
 190         /* save the current high bit state */
 191         bool hbs = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_in.start);
 192         int poll_count;
 193 
 194         for (poll_count = 0 ; poll_count <= MCA_BTL_VADER_POLL_COUNT ; ++poll_count) {
 195             const mca_btl_vader_fbox_hdr_t hdr = mca_btl_vader_fbox_read_header (MCA_BTL_VADER_FBOX_HDR(ep->fbox_in.buffer + start));
 196 
 197             /* check for a valid tag a sequence number */
 198             if (0 == hdr.data.tag || hdr.data.seq != ep->fbox_in.seq) {
 199                 break;
 200             }
 201 
 202             ++ep->fbox_in.seq;
 203 
 204             /* force all prior reads to complete before continuing */
 205             opal_atomic_rmb ();
 206 
 207             BTL_VERBOSE(("got frag from %d with header {.tag = %d, .size = %d, .seq = %u} from offset %u",
 208                          ep->peer_smp_rank, hdr.data.tag, hdr.data.size, hdr.data.seq, start));
 209 
 210             /* the 0xff tag indicates we should skip the rest of the buffer */
 211             if (OPAL_LIKELY((0xfe & hdr.data.tag) != 0xfe)) {
 212                 mca_btl_base_segment_t segment;
 213                 mca_btl_base_descriptor_t desc = {.des_segments = &segment, .des_segment_count = 1};
 214                 const mca_btl_active_message_callback_t *reg =
 215                     mca_btl_base_active_message_trigger + hdr.data.tag;
 216 
 217                 /* fragment fits entirely in the remaining buffer space. some
 218                  * btl users do not handle fragmented data so we can't split
 219                  * the fragment without introducing another copy here. this
 220                  * limitation has not appeared to cause any performance
 221                  * degradation. */
 222                 segment.seg_len = hdr.data.size;
 223                 segment.seg_addr.pval = (void *) (ep->fbox_in.buffer + start + sizeof (hdr));
 224 
 225                 /* call the registered callback function */
 226                 reg->cbfunc(&mca_btl_vader.super, hdr.data.tag, &desc, reg->cbdata);
 227             } else if (OPAL_LIKELY(0xfe == hdr.data.tag)) {
 228                 /* process fragment header */
 229                 fifo_value_t *value = (fifo_value_t *)(ep->fbox_in.buffer + start + sizeof (hdr));
 230                 mca_btl_vader_hdr_t *hdr = relative2virtual(*value);
 231                 mca_btl_vader_poll_handle_frag (hdr, ep);
 232             }
 233 
 234             start = (start + hdr.data.size + sizeof (hdr) + MCA_BTL_VADER_FBOX_ALIGNMENT_MASK) & ~MCA_BTL_VADER_FBOX_ALIGNMENT_MASK;
 235             if (OPAL_UNLIKELY(fbox_size == start)) {
 236                 /* jump to the beginning of the buffer */
 237                 start = MCA_BTL_VADER_FBOX_ALIGNMENT;
 238                 /* toggle the high bit */
 239                 hbs = !hbs;
 240             }
 241         }
 242 
 243         if (poll_count) {
 244             BTL_VERBOSE(("left off at offset %u (hbs: %d)", start, hbs));
 245 
 246             /* save where we left off */
 247             /* let the sender know where we stopped */
 248             opal_atomic_mb ();
 249             ep->fbox_in.start = ep->fbox_in.startp[0] = ((uint32_t) hbs << 31) | start;
 250             processed = true;
 251         }
 252     }
 253 
 254     /* return the number of fragments processed */
 255     return processed;
 256 }
 257 
 258 static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr)
 259 {
 260     if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) {
 261         /* protect access to mca_btl_vader_component.segment_offset */
 262         OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
 263 
 264         /* verify the remote side will accept another fbox */
 265         if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
 266             opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes);
 267 
 268             if (NULL != fbox) {
 269                 /* zero out the fast box */
 270                 memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size);
 271                 mca_btl_vader_endpoint_setup_fbox_send (ep, fbox);
 272 
 273                 hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX;
 274                 hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
 275             } else {
 276                 opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
 277             }
 278 
 279             opal_atomic_wmb ();
 280         }
 281 
 282         OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
 283     }
 284 }
 285 
 286 #endif /* !defined(MCA_BTL_VADER_FBOX_H) */

/* [<][>][^][v][top][bottom][index][help] */