root/opal/mca/btl/usnic/btl_usnic_endpoint.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2006 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006      Sandia National Laboratories. All rights
  13  *                         reserved.
  14  * Copyright (c) 2013-2017 Cisco Systems, Inc.  All rights reserved.
  15  * $COPYRIGHT$
  16  *
  17  * Additional copyrights may follow
  18  *
  19  * $HEADER$
  20  */
  21 
  22 #ifndef OPAL_BTL_USNIC_ENDPOINT_H
  23 #define OPAL_BTL_USNIC_ENDPOINT_H
  24 
  25 #include <rdma/fabric.h>
  26 
  27 #include "opal/class/opal_list.h"
  28 #include "opal/class/opal_hotel.h"
  29 #include "opal/mca/event/event.h"
  30 
  31 #include "btl_usnic.h"
  32 
  33 BEGIN_C_DECLS
  34 
  35 /*
  36  * Forward declarations to avoid include loops
  37  */
  38 struct opal_btl_usnic_module_t;
  39 struct opal_btl_usnic_send_segment_t;
  40 
  41 /*
  42  * Have the window size as a compile-time constant that is a power of
  43  * two so that we can take advantage of fast bit operations.
  44  */
  45 #define WINDOW_SIZE 4096
  46 #define WINDOW_SIZE_MOD(a) (((a) & (WINDOW_SIZE - 1)))
  47 #define WINDOW_OPEN(E) (SEQ_LT((E)->endpoint_next_seq_to_send, \
  48         ((E)->endpoint_ack_seq_rcvd + WINDOW_SIZE)))
  49 #define WINDOW_EMPTY(E) ((E)->endpoint_ack_seq_rcvd == \
  50         ((E)->endpoint_next_seq_to_send-1))
  51 
  52 /*
  53  * Returns true when an endpoint has nothing left to send
  54  */
  55 #define ENDPOINT_DRAINED(E) (WINDOW_EMPTY(E) && \
  56         opal_list_is_empty(&(E)->endpoint_frag_send_queue))
  57 
  58 /*
  59  * Channel IDs
  60  */
  61 typedef enum opal_btl_usnic_channel_id_t {
  62     USNIC_PRIORITY_CHANNEL,
  63     USNIC_DATA_CHANNEL,
  64     USNIC_NUM_CHANNELS
  65 } opal_btl_usnic_channel_id_t;
  66 
  67 typedef struct opal_btl_usnic_modex_t {
  68     /* Stored in network order */
  69     uint32_t ipv4_addr;
  70     /* Stored in host order */
  71     uint32_t ports[USNIC_NUM_CHANNELS];
  72     /* Stored in network order */
  73     uint32_t netmask;
  74     /* Stored in host order */
  75     uint32_t connectivity_udp_port;
  76     uint32_t link_speed_mbps;
  77     uint16_t max_msg_size;
  78     opal_btl_usnic_seq_t isn;
  79     uint32_t protocol;
  80 } opal_btl_usnic_modex_t;
  81 
  82 struct opal_btl_usnic_send_segment_t;
  83 struct opal_btl_usnic_proc_t;
  84 
  85 /*
  86  * This is a descriptor for an incoming fragment that is broken
  87  * into chunks.  When the first reference to this frag_id is seen,
  88  * memory is allocated for it.  When the last byte arrives, the assembled
  89  * fragment is passed to the PML.
  90  *
  91  * The endpoint structure has space for WINDOW_SIZE/2 simultaneous fragments.
  92  * This is the largest number of fragments that can possibly be in-flight
  93  * to us from a particular endpoint because eash chunked fragment will occupy
  94  * at least two segments, and only WINDOW_SIZE segments can be in flight.
  95  * OK, so there is an extremely pathological case where we could see
  96  * (WINDOW_SIZE/2)+1 "in flight" at once, but just dropping that last one
  97  * and waiting for retrans is just fine in this hypothetical hyper-pathological
  98  * case, which is what we'll do.
  99  */
 100 #define MAX_ACTIVE_FRAGS (WINDOW_SIZE/2)
 101 typedef struct opal_btl_usnic_rx_frag_info_t {
 102     uint32_t    rfi_frag_id;    /* ID for this fragment */
 103     uint32_t    rfi_frag_size;  /* bytes in this fragment */
 104     uint32_t    rfi_bytes_left; /* bytes remaining to RX in fragment */
 105     bool        rfi_data_in_pool; /* data in data_pool if true, else malloced */
 106     int         rfi_data_pool;  /* if <0, data malloced, else rx buf pool */
 107     char       *rfi_data;       /* pointer to assembly area */
 108     opal_free_list_item_t *rfi_fl_elt; /* free list elemement from buf pool
 109                                           when rfi_data_pool is nonzero */
 110 } opal_btl_usnic_rx_frag_info_t;
 111 
 112 /**
 113  * An abstraction that represents a connection to a remote process.
 114  * An instance of mca_btl_base_endpoint_t is associated with each
 115  * (btl_usnic_proc_t, btl_usnic_module_t) tuple and address
 116  * information is exchanged at startup.  The usnic BTL is
 117  * connectionless, so no connection is ever established.
 118  */
 119 typedef struct mca_btl_base_endpoint_t {
 120     opal_list_item_t super;
 121 
 122     /** BTL module that created this connection */
 123     struct opal_btl_usnic_module_t *endpoint_module;
 124 
 125     /** proc that owns this endpoint */
 126     struct opal_btl_usnic_proc_t *endpoint_proc;
 127     int endpoint_proc_index;    /* index in owning proc's endpoint array */
 128 
 129     /** True when proc has been deleted, but still have sends that need ACKs */
 130     bool endpoint_exiting;
 131 
 132     /** List item for linking into module "all_endpoints" */
 133     opal_list_item_t endpoint_endpoint_li;
 134 
 135     /** List item for linking into "need ack" */
 136     opal_list_item_t endpoint_ack_li;
 137 
 138     /** Remote address information */
 139     opal_btl_usnic_modex_t endpoint_remote_modex;
 140 
 141     /** Remote address handle. Need one for each
 142         channel because each remote channel has different dest port */
 143     fi_addr_t endpoint_remote_addrs[USNIC_NUM_CHANNELS];
 144 
 145     /** Send-related data */
 146     bool endpoint_ready_to_send;
 147     opal_list_t endpoint_frag_send_queue;
 148     int32_t endpoint_send_credits;
 149     uint32_t endpoint_next_frag_id;
 150 
 151     /** Receive-related data */
 152     struct opal_btl_usnic_rx_frag_info_t *endpoint_rx_frag_info;
 153 
 154     /** OPAL hotel to track outstanding stends */
 155     opal_hotel_t endpoint_hotel;
 156 
 157     /** Sliding window parameters for this peer */
 158     /* Values for the current proc to send to this endpoint on the
 159        peer proc */
 160     opal_btl_usnic_seq_t endpoint_next_seq_to_send; /* n_t */
 161     opal_btl_usnic_seq_t endpoint_ack_seq_rcvd; /* n_a */
 162 
 163     /* Table where sent segments sit while waiting for their ACKs.
 164        When a segment is ACKed, it is removed from this table. */
 165     struct opal_btl_usnic_send_segment_t *endpoint_sent_segs[WINDOW_SIZE];
 166 
 167     /* Values for the current proc to receive from this endpoint on
 168        the peer proc */
 169     bool endpoint_ack_needed;
 170 
 171     /* When we receive a packet that needs an ACK, set this
 172      * to delay the ACK to allow for piggybacking
 173      */
 174     uint64_t endpoint_acktime;
 175 
 176     opal_btl_usnic_seq_t endpoint_next_contig_seq_to_recv; /* n_r */
 177     opal_btl_usnic_seq_t endpoint_highest_seq_rcvd; /* n_s */
 178 
 179     bool endpoint_rcvd_segs[WINDOW_SIZE];
 180     uint32_t endpoint_rfstart;
 181 
 182     bool endpoint_connectivity_checked;
 183     bool endpoint_on_all_endpoints;
 184 } mca_btl_base_endpoint_t;
 185 
 186 typedef mca_btl_base_endpoint_t opal_btl_usnic_endpoint_t;
 187 OBJ_CLASS_DECLARATION(opal_btl_usnic_endpoint_t);
 188 
 189 /*
 190  * Helper struct for the asynchornous creation of fi_addr array
 191  */
 192 typedef struct {
 193     opal_btl_usnic_endpoint_t *endpoint;
 194     opal_btl_usnic_channel_id_t channel_id;
 195 } opal_btl_usnic_addr_context_t;
 196 
 197 /*
 198  * Flush all pending sends and resends from and endpoint
 199  */
 200 void
 201 opal_btl_usnic_flush_endpoint(
 202     opal_btl_usnic_endpoint_t *endpoint);
 203 
 204 END_C_DECLS
 205 #endif

/* [<][>][^][v][top][bottom][index][help] */