root/opal/mca/btl/usnic/btl_usnic_module.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. opal_btl_usnic_get_first_endpoint_needing_ack
  2. opal_btl_usnic_get_next_endpoint_needing_ack
  3. opal_btl_usnic_remove_from_endpoints_needing_ack
  4. opal_btl_usnic_add_to_endpoints_needing_ack

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2011 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006      Sandia National Laboratories. All rights
  14  *                         reserved.
  15  * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved.
  16  * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
  17  *                         reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 /**
  25  * @file
  26  */
  27 #ifndef OPAL_BTL_USNIC_MODULE_H
  28 #define OPAL_BTL_USNIC_MODULE_H
  29 
  30 #include <rdma/fabric.h>
  31 #include <rdma/fi_cm.h>
  32 #include <rdma/fi_eq.h>
  33 #include <rdma/fi_endpoint.h>
  34 #include <rdma/fi_errno.h>
  35 #include <rdma/fi_ext_usnic.h>
  36 
  37 #include "opal/class/opal_pointer_array.h"
  38 
  39 #include "btl_usnic_endpoint.h"
  40 #include "btl_usnic_stats.h"
  41 #include "btl_usnic_util.h"
  42 
  43 /*
  44  * Default limits.
  45  *
  46  * These values obtained from empirical testing on Intel E5-2690
  47  * machines with Sereno/Lexington cards through an N3546 switch.
  48  */
  49 #define USNIC_DFLT_EAGER_LIMIT_1DEVICE (150 * 1024)
  50 #define USNIC_DFLT_EAGER_LIMIT_NDEVICES (25 * 1024)
  51 #define USNIC_DFLT_RNDV_EAGER_LIMIT 500
  52 #define USNIC_DFLT_PACK_LAZY_THRESHOLD (16 * 1024)
  53 
  54 BEGIN_C_DECLS
  55 
  56 /*
  57  * Forward declarations to avoid include loops
  58  */
  59 struct opal_btl_usnic_send_segment_t;
  60 struct opal_btl_usnic_recv_segment_t;
  61 
  62 /*
  63  * Abstraction of a set of endpoints
  64  */
  65 typedef struct opal_btl_usnic_channel_t {
  66     int chan_index;
  67 
  68     struct fid_cq *cq;
  69 
  70     int chan_max_msg_size;
  71     int chan_rd_num;
  72     int chan_sd_num;
  73 
  74     int credits;
  75     uint32_t rx_post_cnt;
  76 
  77     /* fastsend enabled if num_credits_available >= fastsend_wqe_thresh */
  78     unsigned fastsend_wqe_thresh;
  79 
  80     /** pointer to receive segment whose bookkeeping has been deferred */
  81     struct opal_btl_usnic_recv_segment_t *chan_deferred_recv;
  82 
  83     /** queue pair and attributes */
  84     struct fi_info *info;
  85     struct fid_ep *ep;
  86 
  87     struct opal_btl_usnic_recv_segment_t *repost_recv_head;
  88 
  89     /** receive segments & buffers */
  90     opal_free_list_t recv_segs;
  91 
  92     bool chan_error;    /* set when error detected on channel */
  93 
  94     /* statistics */
  95     uint32_t num_channel_sends;
  96 } opal_btl_usnic_channel_t;
  97 
  98 /**
  99  * usnic BTL module
 100  */
 101 typedef struct opal_btl_usnic_module_t {
 102     mca_btl_base_module_t super;
 103 
 104     /* Cache for use during component_init to associate a module with
 105        the libfabric device that it came from. */
 106     uint32_t libfabric_api;
 107     struct fid_fabric *fabric;
 108     struct fid_domain *domain;
 109     char *linux_device_name;
 110     struct fi_info *fabric_info;
 111     struct fi_usnic_ops_fabric *usnic_fabric_ops;
 112     struct fi_usnic_ops_av *usnic_av_ops;
 113     struct fi_usnic_info usnic_info;
 114     struct fid_eq *dom_eq;
 115     struct fid_eq *av_eq;
 116     struct fid_av *av;
 117 
 118     size_t av_eq_size;
 119 
 120     mca_btl_base_module_error_cb_fn_t pml_error_callback;
 121 
 122     /* Information about the events */
 123     struct event device_async_event;
 124     bool device_async_event_active;
 125     int numa_distance; /* hwloc NUMA distance from this process */
 126 
 127     /** local address information */
 128     struct opal_btl_usnic_modex_t local_modex;
 129     char if_ipv4_addr_str[IPV4STRADDRLEN];
 130 
 131     /** desired send, receive, and completion queue entries (from MCA
 132         params; cached here on the component because the MCA param
 133         might == 0, which means "max supported on that device") */
 134     int sd_num;
 135     int rd_num;
 136     int cq_num;
 137     int av_eq_num;
 138     int prio_sd_num;
 139     int prio_rd_num;
 140     int prio_cq_num;
 141 
 142     /*
 143      * Fragments larger than max_frag_payload will be broken up into
 144      * multiple chunks.  The amount that can be held in a single chunk
 145      * segment is slightly less than what can be held in frag segment due
 146      * to fragment reassembly info.
 147      */
 148     size_t max_tiny_msg_size;
 149     size_t max_frag_payload;    /* most that fits in a frag segment */
 150     size_t max_chunk_payload;   /* most that can fit in chunk segment */
 151     size_t max_tiny_payload;    /* threshold for using inline send */
 152 
 153     /** Hash table to keep track of senders */
 154     opal_hash_table_t senders;
 155 
 156     /** list of all endpoints.  Note that the main application thread
 157         reads and writes to this list, and the connectivity agent
 158         reads from it.  So all access to the list (but not the items
 159         in the list) must be protected by a lock.  Also, have a flag
 160         that indicates that the list has been constructed.  Probably
 161         overkill, but you can't be too safe with multi-threaded
 162         programming in non-performance-critical code paths... */
 163     opal_list_t all_endpoints;
 164     opal_mutex_t all_endpoints_lock;
 165     bool all_endpoints_constructed;
 166 
 167     /** array of procs used by this module (can't use a list because a
 168         proc can be used by multiple modules) */
 169     opal_pointer_array_t all_procs;
 170 
 171     /** send fragments & buffers */
 172     opal_free_list_t small_send_frags;
 173     opal_free_list_t large_send_frags;
 174     opal_free_list_t put_dest_frags;
 175     opal_free_list_t chunk_segs;
 176 
 177     /** receive buffer pools */
 178     int first_pool;
 179     int last_pool;
 180     opal_free_list_t *module_recv_buffers;
 181 
 182     /** list of endpoints with data to send */
 183     /* this list uses base endpoint ptr */
 184     opal_list_t endpoints_with_sends;
 185 
 186     /** list of send frags that are waiting to be resent (they
 187         previously deferred because of lack of resources) */
 188     opal_list_t pending_resend_segs;
 189 
 190     /** ack segments */
 191     opal_free_list_t ack_segs;
 192 
 193     /** list of endpoints to which we need to send ACKs */
 194     /* this list uses endpoint->endpoint_ack_li */
 195     opal_list_t endpoints_that_need_acks;
 196 
 197     /* abstract queue-pairs into channels */
 198     opal_btl_usnic_channel_t mod_channels[USNIC_NUM_CHANNELS];
 199 
 200     /* Number of short/erroneous packets we've receive on this
 201        interface */
 202     uint32_t num_short_packets;
 203 
 204     /* Performance / debugging statistics */
 205     opal_btl_usnic_module_stats_t stats;
 206 
 207     /** registration cache module (v2.1+) */
 208     mca_rcache_base_module_t *rcache;
 209 } opal_btl_usnic_module_t;
 210 
 211 struct opal_btl_usnic_frag_t;
 212 extern opal_btl_usnic_module_t opal_btl_usnic_module_template;
 213 
 214 /*
 215  * Manipulate the "endpoints_that_need_acks" list
 216  */
 217 
 218 /* get first endpoint needing ACK */
 219 static inline opal_btl_usnic_endpoint_t *
 220 opal_btl_usnic_get_first_endpoint_needing_ack(
 221     opal_btl_usnic_module_t *module)
 222 {
 223     opal_list_item_t *item;
 224     opal_btl_usnic_endpoint_t *endpoint;
 225 
 226     item = opal_list_get_first(&module->endpoints_that_need_acks);
 227     if (item != opal_list_get_end(&module->endpoints_that_need_acks)) {
 228         endpoint = container_of(item, mca_btl_base_endpoint_t, endpoint_ack_li);
 229         return endpoint;
 230     } else {
 231         return NULL;
 232     }
 233 }
 234 
 235 /* get next item in chain */
 236 static inline opal_btl_usnic_endpoint_t *
 237 opal_btl_usnic_get_next_endpoint_needing_ack(
 238     opal_btl_usnic_endpoint_t *endpoint)
 239 {
 240     opal_list_item_t *item;
 241     opal_btl_usnic_module_t *module;
 242 
 243     module = endpoint->endpoint_module;
 244 
 245     item = opal_list_get_next(&(endpoint->endpoint_ack_li));
 246     if (item != opal_list_get_end(&module->endpoints_that_need_acks)) {
 247         endpoint = container_of(item, mca_btl_base_endpoint_t, endpoint_ack_li);
 248         return endpoint;
 249     } else {
 250         return NULL;
 251     }
 252 }
 253 
 254 static inline void
 255 opal_btl_usnic_remove_from_endpoints_needing_ack(
 256     opal_btl_usnic_endpoint_t *endpoint)
 257 {
 258     opal_list_remove_item(
 259             &(endpoint->endpoint_module->endpoints_that_need_acks),
 260             &endpoint->endpoint_ack_li);
 261     endpoint->endpoint_ack_needed = false;
 262     endpoint->endpoint_acktime = 0;
 263 #if MSGDEBUG1
 264     opal_output(0, "clear ack_needed on %p\n", (void*)endpoint);
 265 #endif
 266 }
 267 
 268 static inline void
 269 opal_btl_usnic_add_to_endpoints_needing_ack(
 270     opal_btl_usnic_endpoint_t *endpoint)
 271 {
 272     opal_list_append(&(endpoint->endpoint_module->endpoints_that_need_acks),
 273             &endpoint->endpoint_ack_li);
 274     endpoint->endpoint_ack_needed = true;
 275 #if MSGDEBUG1
 276     opal_output(0, "set ack_needed on %p\n", (void*)endpoint);
 277 #endif
 278 }
 279 
 280 /*
 281  * Initialize a module
 282  */
 283 int opal_btl_usnic_module_init(opal_btl_usnic_module_t* module);
 284 
 285 
 286 /*
 287  * Progress pending sends on a module
 288  */
 289 void opal_btl_usnic_module_progress_sends(opal_btl_usnic_module_t *module);
 290 
 291 /* opal_output statistics that are useful for debugging */
 292 void opal_btl_usnic_print_stats(
 293     opal_btl_usnic_module_t *module,
 294     const char *prefix,
 295     bool reset_stats);
 296 
 297 END_C_DECLS
 298 #endif

/* [<][>][^][v][top][bottom][index][help] */