root/opal/mca/btl/tcp/btl_tcp.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2016 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2010-2011 Cisco Systems, Inc.  All rights reserved.
  14  * Copyright (c) 2014-2016 Research Organization for Information Science
  15  *                         and Technology (RIST). All rights reserved.
  16  * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
  17  *                         reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 /**
  25  * @file
  26  */
  27 #ifndef MCA_BTL_TCP_H
  28 #define MCA_BTL_TCP_H
  29 
  30 #include "opal_config.h"
  31 #ifdef HAVE_SYS_TYPES_H
  32 #include <sys/types.h>
  33 #endif
  34 #ifdef HAVE_SYS_SOCKET_H
  35 #include <sys/socket.h>
  36 #endif
  37 #ifdef HAVE_NETINET_IN_H
  38 #include <netinet/in.h>
  39 #endif
  40 #ifdef HAVE_UNISTD_H
  41 #include <unistd.h>
  42 #endif
  43 
  44 /* Open MPI includes */
  45 #include "opal/mca/event/event.h"
  46 #include "opal/class/opal_free_list.h"
  47 #include "opal/mca/btl/btl.h"
  48 #include "opal/mca/btl/base/base.h"
  49 #include "opal/mca/mpool/mpool.h"
  50 #include "opal/class/opal_hash_table.h"
  51 #include "opal/util/fd.h"
  52 
  53 #define MCA_BTL_TCP_STATISTICS 0
  54 BEGIN_C_DECLS
  55 
  56 extern opal_event_base_t* mca_btl_tcp_event_base;
  57 
  58 #define MCA_BTL_TCP_COMPLETE_FRAG_SEND(frag)                            \
  59     do {                                                                \
  60         int btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); \
  61         if( frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK ) { \
  62             frag->base.des_cbfunc(&frag->endpoint->endpoint_btl->super, frag->endpoint, \
  63                                   &frag->base, frag->rc);               \
  64         }                                                               \
  65         if( btl_ownership ) {                                           \
  66             MCA_BTL_TCP_FRAG_RETURN(frag);                              \
  67         }                                                               \
  68     } while (0)
  69 #define MCA_BTL_TCP_RECV_TRIGGER_CB(frag)                               \
  70     do {                                                                \
  71         if( MCA_BTL_TCP_HDR_TYPE_SEND == frag->hdr.type ) {             \
  72             mca_btl_active_message_callback_t* reg;                     \
  73             reg = mca_btl_base_active_message_trigger + frag->hdr.base.tag; \
  74             reg->cbfunc(&frag->endpoint->endpoint_btl->super, frag->hdr.base.tag, &frag->base, reg->cbdata); \
  75         }                                                               \
  76     } while (0)
  77 
  78 extern opal_list_t mca_btl_tcp_ready_frag_pending_queue;
  79 extern opal_mutex_t mca_btl_tcp_ready_frag_mutex;
  80 extern int mca_btl_tcp_pipe_to_progress[2];
  81 extern int mca_btl_tcp_progress_thread_trigger;
  82 
  83 #define MCA_BTL_TCP_CRITICAL_SECTION_ENTER(name) \
  84     opal_mutex_atomic_lock((name))
  85 #define MCA_BTL_TCP_CRITICAL_SECTION_LEAVE(name) \
  86     opal_mutex_atomic_unlock((name))
  87 
  88 #define MCA_BTL_TCP_ACTIVATE_EVENT(event, value)                        \
  89     do {                                                                \
  90         if(0 < mca_btl_tcp_progress_thread_trigger) {                   \
  91             opal_event_t* _event = (opal_event_t*)(event);                  \
  92             (void) opal_fd_write( mca_btl_tcp_pipe_to_progress[1], sizeof(opal_event_t*), \
  93                            &_event);                                        \
  94         }                                                                   \
  95         else {                                                          \
  96             opal_event_add(event, (value));                             \
  97         }                                                               \
  98     } while (0)
  99 
 100 /**
 101  * TCP BTL component.
 102  */
 103 
 104 struct mca_btl_tcp_component_t {
 105     mca_btl_base_component_3_0_0_t super;   /**< base BTL component */
 106     uint32_t tcp_addr_count;                /**< total number of addresses */
 107     uint32_t tcp_num_btls;                  /**< number of interfaces available to the TCP component */
 108     unsigned int tcp_num_links;             /**< number of logical links per physical device */
 109     struct mca_btl_tcp_module_t **tcp_btls; /**< array of available BTL modules */
 110     int tcp_free_list_num;                  /**< initial size of free lists */
 111     int tcp_free_list_max;                  /**< maximum size of free lists */
 112     int tcp_free_list_inc;                  /**< number of elements to alloc when growing free lists */
 113     int tcp_endpoint_cache;                 /**< amount of cache on each endpoint */
 114     opal_proc_table_t tcp_procs;            /**< hash table of tcp proc structures */
 115     opal_mutex_t tcp_lock;                  /**< lock for accessing module state */
 116     opal_list_t tcp_events;
 117 
 118     opal_event_t tcp_recv_event;            /**< recv event for IPv4 listen socket */
 119     int tcp_listen_sd;                      /**< IPv4 listen socket for incoming connection requests */
 120     unsigned short tcp_listen_port;         /**< IPv4 listen port */
 121     int tcp_port_min;                       /**< IPv4 minimum port */
 122     int tcp_port_range;                     /**< IPv4 port range */
 123 #if OPAL_ENABLE_IPV6
 124     opal_event_t tcp6_recv_event;           /**< recv event for IPv6 listen socket */
 125     int tcp6_listen_sd;                     /**< IPv6 listen socket for incoming connection requests */
 126     unsigned short tcp6_listen_port;        /**< IPv6 listen port */
 127     int tcp6_port_min;                      /**< IPv4 minimum port */
 128     int tcp6_port_range;                    /**< IPv4 port range */
 129 #endif
 130     /* Port range restriction */
 131 
 132     char*  tcp_if_include;                  /**< comma seperated list of interface to include */
 133     char*  tcp_if_exclude;                  /**< comma seperated list of interface to exclude */
 134     int    tcp_sndbuf;                      /**< socket sndbuf size */
 135     int    tcp_rcvbuf;                      /**< socket rcvbuf size */
 136     int    tcp_disable_family;              /**< disabled AF_family */
 137 
 138     /* free list of fragment descriptors */
 139     opal_free_list_t tcp_frag_eager;
 140     opal_free_list_t tcp_frag_max;
 141     opal_free_list_t tcp_frag_user;
 142 
 143     int tcp_enable_progress_thread;         /** Support for tcp progress thread flag */
 144 
 145     opal_event_t tcp_recv_thread_async_event;
 146     opal_mutex_t tcp_frag_eager_mutex;
 147     opal_mutex_t tcp_frag_max_mutex;
 148     opal_mutex_t tcp_frag_user_mutex;
 149     /* Do we want to use TCP_NODELAY? */
 150     int    tcp_not_use_nodelay;
 151 
 152     /* do we want to warn on all excluded interfaces
 153      * that are not found?
 154      */
 155     bool report_all_unfound_interfaces;
 156 };
 157 typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t;
 158 
 159 OPAL_MODULE_DECLSPEC extern mca_btl_tcp_component_t mca_btl_tcp_component;
 160 
 161 /**
 162  * BTL Module Interface
 163  */
 164 struct mca_btl_tcp_module_t {
 165     mca_btl_base_module_t  super;  /**< base BTL interface */
 166     uint16_t           tcp_ifkindex; /** <BTL kernel interface index */
 167     struct sockaddr_storage tcp_ifaddr;   /**< First address
 168                                              discovered for this
 169                                              interface, bound as
 170                                              sending address for this
 171                                              BTL */
 172     uint32_t           tcp_ifmask;  /**< BTL interface netmask */
 173 
 174     opal_mutex_t       tcp_endpoints_mutex;
 175     opal_list_t        tcp_endpoints;
 176 
 177     mca_btl_base_module_error_cb_fn_t tcp_error_cb;  /**< Upper layer error callback */
 178 #if MCA_BTL_TCP_STATISTICS
 179     size_t tcp_bytes_sent;
 180     size_t tcp_bytes_recv;
 181     size_t tcp_send_handler;
 182 #endif
 183 };
 184 typedef struct mca_btl_tcp_module_t mca_btl_tcp_module_t;
 185 extern mca_btl_tcp_module_t mca_btl_tcp_module;
 186 
 187 #define CLOSE_THE_SOCKET(socket)   {(void)shutdown(socket, SHUT_RDWR); (void)close(socket);}
 188 
 189 /**
 190  * TCP component initialization.
 191  *
 192  * @param num_btl_modules (OUT)           Number of BTLs returned in BTL array.
 193  * @param allow_multi_user_threads (OUT)  Flag indicating wether BTL supports user threads (TRUE)
 194  * @param have_hidden_threads (OUT)       Flag indicating wether BTL uses threads (TRUE)
 195  */
 196 extern mca_btl_base_module_t** mca_btl_tcp_component_init(
 197     int *num_btl_modules,
 198     bool allow_multi_user_threads,
 199     bool have_hidden_threads
 200 );
 201 
 202 
 203 /**
 204  * Cleanup any resources held by the BTL.
 205  *
 206  * @param btl  BTL instance.
 207  * @return     OPAL_SUCCESS or error status on failure.
 208  */
 209 
 210 extern int mca_btl_tcp_finalize(
 211     struct mca_btl_base_module_t* btl
 212 );
 213 
 214 
 215 /**
 216  * PML->BTL notification of change in the process list.
 217  *
 218  * @param btl (IN)
 219  * @param nprocs (IN)     Number of processes
 220  * @param procs (IN)      Set of processes
 221  * @param peers (OUT)     Set of (optional) peer addressing info.
 222  * @param peers (IN/OUT)  Set of processes that are reachable via this BTL.
 223  * @return     OPAL_SUCCESS or error status on failure.
 224  *
 225  */
 226 
 227 extern int mca_btl_tcp_add_procs(
 228     struct mca_btl_base_module_t* btl,
 229     size_t nprocs,
 230     struct opal_proc_t **procs,
 231     struct mca_btl_base_endpoint_t** peers,
 232     opal_bitmap_t* reachable
 233 );
 234 
 235 /**
 236  * PML->BTL notification of change in the process list.
 237  *
 238  * @param btl (IN)     BTL instance
 239  * @param nproc (IN)   Number of processes.
 240  * @param procs (IN)   Set of processes.
 241  * @param peers (IN)   Set of peer data structures.
 242  * @return             Status indicating if cleanup was successful
 243  *
 244  */
 245 
 246 extern int mca_btl_tcp_del_procs(
 247     struct mca_btl_base_module_t* btl,
 248     size_t nprocs,
 249     struct opal_proc_t **procs,
 250     struct mca_btl_base_endpoint_t** peers
 251 );
 252 
 253 
 254 /**
 255  * Initiate an asynchronous send.
 256  *
 257  * @param btl (IN)         BTL module
 258  * @param endpoint (IN)    BTL addressing information
 259  * @param descriptor (IN)  Description of the data to be transfered
 260  * @param tag (IN)         The tag value used to notify the peer.
 261  */
 262 
 263 extern int mca_btl_tcp_send(
 264     struct mca_btl_base_module_t* btl,
 265     struct mca_btl_base_endpoint_t* btl_peer,
 266     struct mca_btl_base_descriptor_t* descriptor,
 267     mca_btl_base_tag_t tag
 268 );
 269 
 270 
 271 /**
 272  * Initiate an asynchronous put.
 273  */
 274 
 275 int mca_btl_tcp_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
 276                      uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 277                      mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 278                      int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 279 
 280 
 281 /**
 282  * Initiate an asynchronous get.
 283  */
 284 
 285 int mca_btl_tcp_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
 286                      uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 287                      mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 288                      int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 289 
 290 /**
 291  * Allocate a descriptor with a segment of the requested size.
 292  * Note that the BTL layer may choose to return a smaller size
 293  * if it cannot support the request.
 294  *
 295  * @param btl (IN)      BTL module
 296  * @param size (IN)     Request segment size.
 297  */
 298 
 299 extern mca_btl_base_descriptor_t* mca_btl_tcp_alloc(
 300     struct mca_btl_base_module_t* btl,
 301     struct mca_btl_base_endpoint_t* endpoint,
 302     uint8_t order,
 303     size_t size,
 304     uint32_t flags);
 305 
 306 
 307 /**
 308  * Return a segment allocated by this BTL.
 309  *
 310  * @param btl (IN)      BTL module
 311  * @param descriptor (IN)  Allocated descriptor.
 312  */
 313 
 314 extern int mca_btl_tcp_free(
 315     struct mca_btl_base_module_t* btl,
 316     mca_btl_base_descriptor_t* des);
 317 
 318 
 319 /**
 320  * Prepare a descriptor for send/rdma using the supplied
 321  * convertor. If the convertor references data that is contigous,
 322  * the descriptor may simply point to the user buffer. Otherwise,
 323  * this routine is responsible for allocating buffer space and
 324  * packing if required.
 325  *
 326  * @param btl (IN)          BTL module
 327  * @param endpoint (IN)     BTL peer addressing
 328  * @param convertor (IN)    Data type convertor
 329  * @param reserve (IN)      Additional bytes requested by upper layer to precede user data
 330  * @param size (IN/OUT)     Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
 331 */
 332 
 333 mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
 334     struct mca_btl_base_module_t* btl,
 335     struct mca_btl_base_endpoint_t* peer,
 336     struct opal_convertor_t* convertor,
 337     uint8_t order,
 338     size_t reserve,
 339     size_t* size,
 340     uint32_t flags
 341 );
 342 
 343 extern void
 344 mca_btl_tcp_dump(struct mca_btl_base_module_t* btl,
 345                  struct mca_btl_base_endpoint_t* endpoint,
 346                  int verbose);
 347 
 348 /**
 349   * Fault Tolerance Event Notification Function
 350   * @param state Checkpoint Stae
 351   * @return OPAL_SUCCESS or failure status
 352   */
 353 int mca_btl_tcp_ft_event(int state);
 354 
 355 /*
 356  * A blocking send on a non-blocking socket. Used to send the small
 357  * amount of connection information that identifies the endpoints
 358  * endpoint.
 359  */
 360 int mca_btl_tcp_send_blocking(int sd, const void* data, size_t size);
 361 
 362 /*
 363  * A blocking recv for both blocking and non-blocking socket.
 364  * Used to receive the small amount of connection information
 365  * that identifies the endpoints
 366  *
 367  * when the socket is blocking (the caller introduces timeout)
 368  * which happens during initial handshake otherwise socket is
 369  * non-blocking most of the time.
 370  */
 371 int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size);
 372 
 373 END_C_DECLS
 374 #endif

/* [<][>][^][v][top][bottom][index][help] */