root/opal/mca/btl/usnic/btl_usnic_ack.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_btl_usnic_fast_retrans
  2. opal_btl_usnic_handle_ack
  3. opal_btl_usnic_ack_send
  4. opal_btl_usnic_ack_complete
  5. opal_btl_usnic_ack_timeout

   1 /*
   2  * Copyright (c) 2013-2017 Cisco Systems, Inc.  All rights reserved.
   3  * $COPYRIGHT$
   4  *
   5  * Additional copyrights may follow
   6  *
   7  * $HEADER$
   8  */
   9 
  10 #include "opal_config.h"
  11 
  12 #include <errno.h>
  13 #include <string.h>
  14 #include <unistd.h>
  15 
  16 #include "opal/util/output.h"
  17 #include "opal/class/opal_hotel.h"
  18 
  19 #include "btl_usnic.h"
  20 #include "btl_usnic_frag.h"
  21 #include "btl_usnic_endpoint.h"
  22 #include "btl_usnic_module.h"
  23 #include "btl_usnic_ack.h"
  24 #include "btl_usnic_util.h"
  25 #include "btl_usnic_send.h"
  26 #include "btl_usnic_connectivity.h"
  27 
  28 /*
  29  * Special case: we know exactly which segment is missing at the
  30  * receive; explicitly force retrans of that segment.
  31  */
  32 static void
  33 opal_btl_usnic_fast_retrans(
  34     opal_btl_usnic_endpoint_t *endpoint,
  35     opal_btl_usnic_seq_t ack_seq)
  36 {
  37     opal_btl_usnic_send_segment_t *sseg;
  38     int is;
  39 
  40     is = WINDOW_SIZE_MOD(ack_seq + 1);
  41     sseg = endpoint->endpoint_sent_segs[is];
  42 
  43     // If the sseg is NULL, then there's nothing to retransmit.  If
  44     // the hotel room is -1, the segment has already been queued up
  45     // for retransmit and there's nothing additional we need to do
  46     // here.
  47     if (sseg == NULL || sseg->ss_hotel_room == -1) {
  48         return;
  49     }
  50 
  51     /* cancel retrans timer */
  52     opal_hotel_checkout(&endpoint->endpoint_hotel, sseg->ss_hotel_room);
  53     sseg->ss_hotel_room = -1;
  54 
  55     /* Queue up this segment to be resent */
  56     opal_list_append(&(endpoint->endpoint_module->pending_resend_segs),
  57                      &(sseg->ss_base.us_list.super));
  58 
  59     ++endpoint->endpoint_module->stats.num_fast_retrans;
  60 }
  61 
  62 
  63 /*
  64  * We have received an ACK for a given sequence number (either standalone
  65  * or via piggy-back on a regular send)
  66  */
  67 void
  68 opal_btl_usnic_handle_ack(
  69     opal_btl_usnic_endpoint_t *endpoint,
  70     opal_btl_usnic_seq_t ack_seq)
  71 {
  72     opal_btl_usnic_seq_t is;
  73     opal_btl_usnic_send_segment_t *sseg;
  74     opal_btl_usnic_send_frag_t *frag;
  75     opal_btl_usnic_module_t *module;
  76     uint32_t bytes_acked;
  77 
  78     module = endpoint->endpoint_module;
  79 
  80     /* ignore if this is an old ACK */
  81     if (SEQ_LT(ack_seq, endpoint->endpoint_ack_seq_rcvd)) {
  82 #if MSGDEBUG1
  83         opal_output(0, "Got OLD DUP ACK seq %"UDSEQ" < %"UDSEQ"\n",
  84                 ack_seq, endpoint->endpoint_ack_seq_rcvd);
  85 #endif
  86         ++module->stats.num_old_dup_acks;
  87         return;
  88     }
  89 
  90     /* A duplicate ACK means the sender did not receive the next
  91        seg that we sent */
  92     else if (ack_seq == endpoint->endpoint_ack_seq_rcvd) {
  93         ++module->stats.num_dup_acks;
  94 
  95         opal_btl_usnic_fast_retrans(endpoint, ack_seq);
  96         return;
  97     }
  98 
  99     /* Does this ACK have a new sequence number that we haven't
 100        seen before? */
 101     for (is = endpoint->endpoint_ack_seq_rcvd + 1; SEQ_LE(is, ack_seq); ++is) {
 102         sseg = endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)];
 103 
 104 #if MSGDEBUG1
 105         opal_output(0, "  Checking ACK/sent_segs window %p, index %lu, seq %lu, occupied=%p, seg_room=%d",
 106             (void*) endpoint->endpoint_sent_segs,
 107             WINDOW_SIZE_MOD(is), is, (void*)sseg, (sseg?sseg->ss_hotel_room:-2));
 108 #endif
 109 
 110         assert(sseg != NULL);
 111         assert(sseg->ss_base.us_btl_header->pkt_seq == is);
 112 #if MSGDEBUG1
 113         if (sseg->ss_hotel_room == -1) {
 114             opal_output(0, "=== ACKed frag in sent_frags array is not in hotel/enqueued, module %p, endpoint %p, seg %p, seq %" UDSEQ ", slot %lu",
 115                         (void*) module, (void*) endpoint,
 116                         (void*) sseg, is, WINDOW_SIZE_MOD(is));
 117         }
 118 #endif
 119 
 120         /* Check the sending segment out from the hotel.  NOTE: The
 121            segment might not actually be in a hotel room if it has
 122            already been evicted and queued for resend.
 123            If it's not in the hotel, don't check it out! */
 124         if (OPAL_LIKELY(sseg->ss_hotel_room != -1)) {
 125             opal_hotel_checkout(&endpoint->endpoint_hotel, sseg->ss_hotel_room);
 126             sseg->ss_hotel_room = -1;
 127         }
 128         /* hotel_room == -1 means queued for resend, remove it */
 129         else {
 130             opal_list_remove_item((&module->pending_resend_segs),
 131                     &sseg->ss_base.us_list.super);
 132         }
 133 
 134         /* update the owning fragment */
 135         bytes_acked = sseg->ss_base.us_btl_header->payload_len;
 136         frag = sseg->ss_parent_frag;
 137 
 138 #if MSGDEBUG1
 139         opal_output(0, "   ACKED seg %p frag %p ack_bytes=%"PRIu32" left=%zd dst_seg[0].seg_addr=%p des_flags=0x%x\n",
 140                 (void*)sseg, (void*)frag, bytes_acked,
 141                 frag->sf_ack_bytes_left - bytes_acked,
 142                 frag->sf_base.uf_local_seg[0].seg_addr.pval,
 143                 frag->sf_base.uf_base.des_flags);
 144 #endif
 145 
 146         /* If all ACKs received, and this is a put or a regular send
 147          * that needs a callback, perform the callback now
 148          *
 149          * NOTE on sf_ack_bytes_left - here we check for
 150          *      sf_ack_bytes_left == bytes_acked
 151          * as opposed to adjusting sf_ack_bytes_left and checking for 0 because
 152          * if we don't, the callback function may call usnic_free() and free
 153          * the fragment out from under us which we do not want.  If the
 154          * fragment really needs to be freed, we'll take care of it in a few
 155          * lines below.
 156          */
 157         if (frag->sf_ack_bytes_left == bytes_acked) {
 158 #if BTL_VERSION == 30
 159             if (frag->sf_base.uf_remote_seg[0].seg_addr.pval != NULL) {
 160                 OPAL_BTL_USNIC_DO_PUT_FRAG_CB(module, frag, "put completion");
 161             } else if (frag->sf_base.uf_base.des_flags &
 162                        MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
 163                 OPAL_BTL_USNIC_DO_SEND_FRAG_CB(module, frag, "send completion");
 164             }
 165 #else
 166             if ((frag->sf_base.uf_remote_seg[0].seg_addr.pval != NULL) ||
 167                 (frag->sf_base.uf_base.des_flags &
 168                  MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
 169                 OPAL_BTL_USNIC_DO_SEND_FRAG_CB(module, frag, "send completion");
 170             }
 171 #endif
 172         }
 173 
 174         /* free this segment */
 175         sseg->ss_ack_pending = false;
 176         if (sseg->ss_send_posted == 0) {
 177             opal_btl_usnic_release_send_segment(module, frag, sseg);
 178         }
 179 
 180         /* when no bytes left to ACK, fragment send is truly done */
 181         /* see note above on why this is done here as opposed to earlier */
 182         frag->sf_ack_bytes_left -= bytes_acked;
 183 
 184         /* OK to return this fragment? */
 185         opal_btl_usnic_send_frag_return_cond(module, frag);
 186 
 187         /* indicate this segment has been ACKed */
 188         endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)] = NULL;
 189     }
 190 
 191     /* update ACK received */
 192     endpoint->endpoint_ack_seq_rcvd = ack_seq;
 193 
 194     /* send window may have opened, possibly make endpoint ready-to-send */
 195     opal_btl_usnic_check_rts(endpoint);
 196 }
 197 
 198 /*
 199  * Send an ACK
 200  */
 201 int
 202 opal_btl_usnic_ack_send(
 203     opal_btl_usnic_module_t *module,
 204     opal_btl_usnic_endpoint_t *endpoint)
 205 {
 206     opal_btl_usnic_ack_segment_t *ack;
 207 
 208     /* If we don't have any send credits in the priority channel,
 209        don't send it */
 210     if (module->mod_channels[USNIC_PRIORITY_CHANNEL].credits < 1) {
 211         return OPAL_ERR_OUT_OF_RESOURCE;
 212     }
 213 
 214     /* Get an ACK frag.  If we don't get one, just discard this ACK. */
 215     ack = opal_btl_usnic_ack_segment_alloc(module);
 216     if (OPAL_UNLIKELY(NULL == ack)) {
 217         return OPAL_ERR_OUT_OF_RESOURCE;
 218     }
 219 
 220     --module->mod_channels[USNIC_PRIORITY_CHANNEL].credits;
 221 
 222     /* send the seq of the lowest item in the window that
 223        we've received */
 224     ack->ss_base.us_btl_header->ack_seq =
 225         SEQ_DIFF(endpoint->endpoint_next_contig_seq_to_recv, 1);
 226     ack->ss_len = sizeof(opal_btl_usnic_btl_header_t);
 227 
 228 #if MSGDEBUG1
 229     {
 230         char remote_ip[IPV4STRADDRLEN];
 231         struct opal_btl_usnic_modex_t *modex =
 232             &endpoint->endpoint_remote_modex;
 233         opal_btl_usnic_snprintf_ipv4_addr(remote_ip, sizeof(remote_ip),
 234                                           modex->ipv4_addr,
 235                                           modex->netmask);
 236 
 237 
 238         opal_output(0, "--> Sending ACK, length %d, seq %" UDSEQ " to %s, port %u",
 239                     ack->ss_len,
 240                     ack->ss_base.us_btl_header->ack_seq,
 241                     remote_ip,
 242                     modex->ports[ack->ss_channel]);
 243     }
 244 #endif
 245 
 246     /* Do we need to check the connectivity?  If enabled, we'll check
 247        the connectivity at either first send to peer X or first ACK to
 248        peer X. */
 249     opal_btl_usnic_check_connectivity(module, endpoint);
 250 
 251     /* send the ACK */
 252     opal_btl_usnic_post_ack(module, endpoint, ack);
 253 
 254     /* Stats */
 255     ++module->stats.num_ack_sends;
 256 
 257     return OPAL_SUCCESS;
 258 }
 259 
 260 /*
 261  * Sending an ACK has completed, return the segment to the free list
 262  */
 263 void
 264 opal_btl_usnic_ack_complete(opal_btl_usnic_module_t *module,
 265                                    opal_btl_usnic_ack_segment_t *ack)
 266 {
 267     ++module->mod_channels[USNIC_PRIORITY_CHANNEL].credits;
 268     opal_btl_usnic_ack_segment_return(module, ack);
 269     ++module->mod_channels[ack->ss_channel].credits;
 270 }
 271 
 272 /*****************************************************************************/
 273 
 274 /*
 275  * Callback for when a send times out without receiving a
 276  * corresponding ACK.
 277  */
 278 void
 279 opal_btl_usnic_ack_timeout(
 280     opal_hotel_t *hotel,
 281     int room_num,
 282     void *occupant)
 283 {
 284     opal_btl_usnic_send_segment_t *seg;
 285     opal_btl_usnic_endpoint_t *endpoint;
 286     opal_btl_usnic_module_t *module;
 287 
 288     seg = (opal_btl_usnic_send_segment_t*) occupant;
 289     endpoint = seg->ss_parent_frag->sf_endpoint;
 290     module = endpoint->endpoint_module;
 291 
 292 #if MSGDEBUG1
 293     {
 294         opal_output(0, "Send timeout!  seg %p, room %d, seq %" UDSEQ "\n",
 295                     (void*)seg, seg->ss_hotel_room,
 296                     seg->ss_base.us_btl_header->pkt_seq);
 297     }
 298 #endif
 299 
 300     /* timeout checks us out, note this */
 301     seg->ss_hotel_room = -1;
 302 
 303     /* Queue up this frag to be resent */
 304     opal_list_append(&(module->pending_resend_segs),
 305                      &(seg->ss_base.us_list.super));
 306 
 307     /* Stats */
 308     ++module->stats.num_timeout_retrans;
 309 }

/* [<][>][^][v][top][bottom][index][help] */