root/opal/mca/btl/usnic/btl_usnic.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. get_nsec

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2011 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006      Sandia National Laboratories. All rights
  14  *                         reserved.
  15  * Copyright (c) 2011-2019 Cisco Systems, Inc.  All rights reserved
  16  * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
  17  *                         reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 /**
  25  * @file
  26  */
  27 #ifndef OPAL_BTL_USNIC_H
  28 #define OPAL_BTL_USNIC_H
  29 
  30 #include "opal_config.h"
  31 #include <sys/types.h>
  32 
  33 #include "opal_stdint.h"
  34 #include "opal/util/alfg.h"
  35 #include "opal/class/opal_hash_table.h"
  36 #include "opal/class/opal_hash_table.h"
  37 #include "opal/mca/event/event.h"
  38 
  39 #include "opal/mca/btl/btl.h"
  40 #include "opal/mca/btl/base/btl_base_error.h"
  41 #include "opal/mca/btl/base/base.h"
  42 #include "opal/mca/rcache/rcache.h"
  43 
  44 #include "btl_usnic_compat.h"
  45 
  46 BEGIN_C_DECLS
  47 
  48 /*
  49  * We're simulating a clock as best we can without resorting to the
  50  * system.  The clock is used to defer ACKs, and ticks will be incremented
  51  * when progression gets called.  It could be incremented by different amounts
  52  * at other times as needed or as tuning dictates.
  53  */
  54 extern uint64_t opal_btl_usnic_ticks;
  55 
  56 /* Lock for MPU_THREAD_MULTIPLE support */
  57 extern opal_recursive_mutex_t btl_usnic_lock;
  58 
  59 static inline uint64_t
  60 get_nsec(void)
  61 {
  62     return opal_btl_usnic_ticks;
  63 }
  64 
  65 /* RNG buffer declaration */
  66 extern opal_rng_buff_t opal_btl_usnic_rand_buff;
  67 
  68 #ifndef container_of
  69 #define container_of(ptr, type, member) ( \
  70         (type *)( ((char *)(ptr)) - offsetof(type,member) ))
  71 #endif
  72 
  73 #ifndef max
  74 #define max(a, b) (((a) > (b)) ? (a) : (b))
  75 #endif
  76 
  77 /* MSGDEBUG2 prints 1 line at each BTL entry point */
  78 #define MSGDEBUG2 (MSGDEBUG1||0)
  79 /* MSGDEBUG1 prints more info about arguments and internal functions */
  80 #define MSGDEBUG1 0
  81 
  82 /* output macros to declutter source */
  83 #if MSGDEBUG1
  84 #define MSGDEBUG1_OUT(...) opal_output(0, __VA_ARGS__)
  85 #else
  86 #define MSGDEBUG1_OUT(...) do {} while (0)
  87 #endif
  88 #if MSGDEBUG2
  89 #define MSGDEBUG2_OUT(...) opal_output(0, __VA_ARGS__)
  90 #else
  91 #define MSGDEBUG2_OUT(...) do {} while (0)
  92 #endif
  93 
  94 /* Set to >0 to randomly drop received frags.  The higher the number,
  95    the more frequent the drops. */
  96 #define WANT_RECV_DROPS 0
  97 /* Set to >0 to randomly fail to send an ACK, mimicing a lost ACK.
  98    The higher the number, the more frequent the failed-to-send-ACK. */
  99 #define WANT_FAIL_TO_SEND_ACK 0
 100 /* Set to >0 to randomly fail to resend a frag (causing it to be
 101    requed to be sent later).  The higher the number, the more frequent
 102    the failed-to-resend-frag. */
 103 #define WANT_FAIL_TO_RESEND_FRAG 0
 104 
 105 #if WANT_RECV_DROPS > 0
 106 #define FAKE_RECV_DROP (opal_rand(&opal_btl_usnic_rand_buff) < WANT_RECV_DROPS)
 107 #else
 108 #define FAKE_RECV_DROP 0
 109 #endif
 110 
 111 #if WANT_FAIL_TO_SEND_ACK > 0
 112 #define FAKE_FAIL_TO_SEND_ACK (opal_rand(&opal_btl_usnic_rand_buff) < WANT_FAIL_TO_SEND_ACK)
 113 #else
 114 #define FAKE_FAIL_TO_SEND_ACK 0
 115 #endif
 116 
 117 #if WANT_FAIL_TO_RESEND_FRAG > 0
 118 #define FAKE_FAIL_TO_RESEND_FRAG (opal_rand(&opal_btl_usnic_rand_buff) < WANT_FAIL_TO_RESEND_FRAG)
 119 #else
 120 #define FAKE_FAIL_TO_RESEND_FRAG 0
 121 #endif
 122 
 123 
 124 /**
 125  * usnic BTL component
 126  */
 127 typedef struct opal_btl_usnic_component_t {
 128     /** base BTL component */
 129     mca_btl_base_component_2_0_0_t super;
 130 
 131     /* in the v1.6 series, sizeof(super) is 256, leading to good alignment for
 132      * subsequent fastpath fields */
 133 
 134     /** Maximum number of BTL modules */
 135     int max_modules;
 136     /** Number of available/initialized BTL modules */
 137     int num_modules;
 138 
 139     /* Cached hashed version of my RTE proc name (to stuff in
 140        protocol headers) */
 141     uint64_t my_hashed_rte_name;
 142 
 143     /** array of possible BTLs (>= num_modules elements) */
 144     struct opal_btl_usnic_module_t* usnic_all_modules;
 145     /** array of pointers to active BTLs (num_modules elements) */
 146     struct opal_btl_usnic_module_t** usnic_active_modules;
 147 
 148     /** convertor packing threshold */
 149     int pack_lazy_threshold;
 150 
 151     /* vvvvvvvvvv non-fastpath fields go below vvvvvvvvvv */
 152 
 153     /** list of usnic proc structures */
 154     opal_list_t usnic_procs;
 155 
 156     /** memory pool hints */
 157     char* usnic_mpool_hints;
 158 
 159     /** registration cache name */
 160     char *usnic_rcache_name;
 161 
 162     char *if_include;
 163     char *if_exclude;
 164 
 165     /** Want stats? */
 166     bool stats_enabled;
 167     bool stats_relative;
 168     int stats_frequency;
 169 
 170     /** Whether we want to use NUMA distances to choose which usNIC
 171         devices to use for short messages */
 172     bool want_numa_device_assignment;
 173 
 174     /** max send descriptors to post per module */
 175     int32_t sd_num;
 176 
 177     /** max receive descriptors per module */
 178     int32_t rd_num;
 179 
 180     /** max send/receive desriptors for priority channel */
 181     int32_t prio_sd_num;
 182     int32_t prio_rd_num;
 183 
 184     /** max completion queue entries per module */
 185     int32_t cq_num;
 186 
 187     /** max number of entries in AV EQ */
 188     int32_t av_eq_num;
 189 
 190     /** retrans characteristics */
 191     int retrans_timeout;
 192 
 193     /** transport header length for all usNIC devices on this server
 194         (it is guaranteed that all usNIC devices on a single server
 195         will have the same underlying transport, and therefore the
 196         same transport header length) */
 197     int transport_header_len;
 198     uint32_t transport_protocol;
 199 
 200     /* what UDP port do we want to use?  If 0, the system will pick.
 201        If nonzero, it is used as the base -- the final number will be
 202        (base+my_local_rank). */
 203     int udp_port_base;
 204 
 205     /** disable the "cannot find route" warnings (for network setups
 206         where this is known/acceptable) */
 207     bool show_route_failures;
 208 
 209     /** connectivity verification: ACK timeout, number of retries
 210         before issue an error/abort the job */
 211     bool connectivity_enabled;
 212     int connectivity_ack_timeout;
 213     int connectivity_num_retries;
 214 
 215     /** how many short packets have to be received before outputting
 216         the "received short packets" warning? */
 217     uint32_t max_short_packets;
 218 
 219     /* Prefix for the connectivity map filename (map will be output if
 220        the prefix is non-NULL) */
 221     char *connectivity_map_prefix;
 222 
 223     /** Offset into the send buffer where the payload will go.  For
 224         libfabric v1.0.0 / API v1.0, this is 0.  For libfabric >=v1.1
 225         / API >=v1.1, this is the endpoint.msg_prefix_size (i.e.,
 226         component.transport_header_len). */
 227     uint32_t prefix_send_offset;
 228 
 229     /* OPAL async progress event base */
 230     opal_event_base_t *opal_evbase;
 231 } opal_btl_usnic_component_t;
 232 
 233 OPAL_MODULE_DECLSPEC extern opal_btl_usnic_component_t mca_btl_usnic_component;
 234 
 235 typedef mca_btl_base_recv_reg_t opal_btl_usnic_recv_reg_t;
 236 
 237 /**
 238  * Size for sequence numbers (just to ensure we use the same size
 239  * everywhere)
 240  */
 241 typedef uint16_t opal_btl_usnic_seq_t;
 242 #define UDSEQ PRIu16
 243 
 244 /* sequence number comparison macros that allow for rollover.
 245  * Relies on the fact that sequence numbers should be relatively close
 246  * together as compared to (1<<31)
 247  */
 248 #define SEQ_DIFF(A,B) ((int16_t)((A)-(B)))
 249 #define SEQ_LT(A,B) (SEQ_DIFF(A,B) < 0)
 250 #define SEQ_LE(A,B) (SEQ_DIFF(A,B) <= 0)
 251 #define SEQ_GT(A,B) (SEQ_DIFF(A,B) > 0)
 252 #define SEQ_GE(A,B) (SEQ_DIFF(A,B) >= 0)
 253 
 254 /**
 255  * Register the usnic BTL MCA params
 256  */
 257 int opal_btl_usnic_component_register(void);
 258 
 259 /**
 260  * Routine which can be called from a debugger to print module, endpoint,
 261  * fragment, and segment state to standard output. */
 262 void opal_btl_usnic_component_debug(void);
 263 
 264 /**
 265  * Called to output the connectivity map
 266  */
 267 void opal_btl_usnic_connectivity_map(void);
 268 
 269 END_C_DECLS
 270 #endif

/* [<][>][^][v][top][bottom][index][help] */