root/opal/mca/btl/ofi/btl_ofi.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. mca_btl_ofi_context_trylock
  2. mca_btl_ofi_context_lock
  3. mca_btl_ofi_context_unlock

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2018 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
  14  *                         reserved.
  15  * Copyright (c) 2018      Intel, Inc, All rights reserved
  16  * $COPYRIGHT$
  17  *
  18  * Additional copyrights may follow
  19  *
  20  * $HEADER$
  21  */
  22 /**
  23  * @file
  24  */
  25 #ifndef MCA_BTL_OFI_H
  26 #define MCA_BTL_OFI_H
  27 
  28 #include "opal_config.h"
  29 #include <sys/types.h>
  30 #include <string.h>
  31 
  32 /* Open MPI includes */
  33 #include "opal/mca/event/event.h"
  34 #include "opal/mca/btl/btl.h"
  35 #include "opal/mca/btl/base/base.h"
  36 #include "opal/mca/mpool/mpool.h"
  37 #include "opal/mca/btl/base/btl_base_error.h"
  38 #include "opal/mca/rcache/base/base.h"
  39 #include "opal/mca/pmix/pmix.h"
  40 
  41 #include "opal/class/opal_hash_table.h"
  42 
  43 #include <rdma/fabric.h>
  44 #include <rdma/fi_domain.h>
  45 #include <rdma/fi_errno.h>
  46 #include <rdma/fi_cm.h>
  47 #include <rdma/fi_endpoint.h>
  48 #include <rdma/fi_rma.h>
  49 
  50 BEGIN_C_DECLS
  51 #define MCA_BTL_OFI_MAX_MODULES         16
  52 #define MCA_BTL_OFI_NUM_CQE_READ        64
  53 
  54 #define MCA_BTL_OFI_DEFAULT_RD_NUM              10
  55 #define MCA_BTL_OFI_DEFAULT_MAX_CQE             128
  56 #define MCA_BTL_OFI_DEFAULT_PROGRESS_THRESHOLD  64
  57 
  58 #define MCA_BTL_OFI_ABORT(args)     mca_btl_ofi_exit(args)
  59 
  60 #define TWO_SIDED_ENABLED           mca_btl_ofi_component.two_sided_enabled
  61 
  62 enum mca_btl_ofi_mode {
  63     MCA_BTL_OFI_MODE_ONE_SIDED = 0,
  64     MCA_BTL_OFI_MODE_TWO_SIDED,
  65     MCA_BTL_OFI_MODE_FULL_SUPPORT,
  66     MCA_BTL_OFI_MODE_TOTAL
  67 };
  68 
  69 enum mca_btl_ofi_hdr_type {
  70     MCA_BTL_OFI_TYPE_PUT = 0,
  71     MCA_BTL_OFI_TYPE_GET,
  72     MCA_BTL_OFI_TYPE_AOP,
  73     MCA_BTL_OFI_TYPE_AFOP,
  74     MCA_BTL_OFI_TYPE_CSWAP,
  75     MCA_BTL_OFI_TYPE_SEND,
  76     MCA_BTL_OFI_TYPE_RECV,
  77     MCA_BTL_OFI_TYPE_TOTAL
  78 };
  79 
  80 struct mca_btl_ofi_context_t {
  81     int32_t context_id;
  82 
  83     /* transmit context */
  84     struct fid_ep *tx_ctx;
  85     struct fid_ep *rx_ctx;
  86 
  87     /* completion queue */
  88     struct fid_cq *cq;
  89 
  90     /* completion info freelist */
  91     /* We have it per context to reduce the thread contention
  92      * on the freelist. Things can get really slow. */
  93     opal_free_list_t rdma_comp_list;
  94     opal_free_list_t frag_comp_list;
  95     opal_free_list_t frag_list;
  96 
  97     /* for thread locking */
  98     opal_atomic_int32_t lock;
  99 };
 100 typedef struct mca_btl_ofi_context_t mca_btl_ofi_context_t;
 101 
 102 /**
 103  * @brief OFI BTL module
 104  */
 105 struct mca_btl_ofi_module_t {
 106     /** base BTL interface */
 107     mca_btl_base_module_t super;
 108 
 109     /* libfabric components */
 110     struct fi_info *fabric_info;
 111     struct fid_fabric *fabric;
 112     struct fid_domain *domain;
 113     struct fid_ep *ofi_endpoint;
 114     struct fid_av *av;
 115 
 116     int num_contexts;
 117     mca_btl_ofi_context_t *contexts;
 118 
 119     char *linux_device_name;
 120 
 121     /** whether the module has been fully initialized or not */
 122     bool initialized;
 123     bool use_virt_addr;
 124     bool is_scalable_ep;
 125 
 126     opal_atomic_int64_t outstanding_rdma;
 127     opal_atomic_int64_t outstanding_send;
 128 
 129     /** linked list of BTL endpoints. this list is never searched so
 130      * there is no need for a complicated structure here at this time*/
 131     opal_list_t endpoints;
 132 
 133     opal_mutex_t module_lock;
 134     opal_hash_table_t id_to_endpoint;
 135 
 136     /** registration cache */
 137     mca_rcache_base_module_t *rcache;
 138 };
 139 typedef struct mca_btl_ofi_module_t mca_btl_ofi_module_t;
 140 
 141 extern mca_btl_ofi_module_t mca_btl_ofi_module_template;
 142 
 143 /**
 144  * @brief OFI BTL component
 145  */
 146 struct mca_btl_ofi_component_t {
 147     mca_btl_base_component_3_0_0_t super;  /**< base BTL component */
 148 
 149     /** number of TL modules */
 150     int module_count;
 151     int num_contexts_per_module;
 152     int num_cqe_read;
 153     int progress_threshold;
 154     int mode;
 155     int rd_num;
 156     bool two_sided_enabled;
 157 
 158     size_t namelen;
 159 
 160     /** All BTL OFI modules (1 per tl) */
 161     mca_btl_ofi_module_t *modules[MCA_BTL_OFI_MAX_MODULES];
 162 
 163 };
 164 typedef struct mca_btl_ofi_component_t mca_btl_ofi_component_t;
 165 
 166 OPAL_MODULE_DECLSPEC extern mca_btl_ofi_component_t mca_btl_ofi_component;
 167 
 168 struct mca_btl_base_registration_handle_t {
 169     uint64_t rkey;
 170     void *desc;
 171     void *base_addr;
 172 };
 173 
 174 struct mca_btl_ofi_reg_t {
 175     mca_rcache_base_registration_t base;
 176     struct fid_mr *ur_mr;
 177 
 178     /* remote handle */
 179     mca_btl_base_registration_handle_t handle;
 180 };
 181 typedef struct mca_btl_ofi_reg_t mca_btl_ofi_reg_t;
 182 
 183 OBJ_CLASS_DECLARATION(mca_btl_ofi_reg_t);
 184 
 185 struct mca_btl_ofi_header_t {
 186     mca_btl_base_tag_t tag;
 187     size_t len;
 188 };
 189 typedef struct mca_btl_ofi_header_t mca_btl_ofi_header_t;
 190 
 191 struct mca_btl_ofi_base_frag_t {
 192     mca_btl_base_descriptor_t base;
 193     mca_btl_base_segment_t segments[2];
 194 
 195     int context_id;
 196     struct mca_btl_ofi_module_t *btl;
 197     struct mca_btl_base_endpoint_t *endpoint;
 198     opal_free_list_t *free_list;
 199     mca_btl_ofi_header_t hdr;
 200 };
 201 
 202 typedef struct mca_btl_ofi_base_frag_t mca_btl_ofi_base_frag_t;
 203 
 204 OBJ_CLASS_DECLARATION(mca_btl_ofi_base_frag_t);
 205 
 206 
 207 struct mca_btl_ofi_completion_context_t {
 208     struct fi_context ctx;
 209     void *comp;
 210 };
 211 
 212 typedef struct mca_btl_ofi_completion_context_t mca_btl_ofi_completion_context_t;
 213 
 214 /* completion structure store information needed
 215  * for RDMA callbacks */
 216 struct mca_btl_ofi_base_completion_t {
 217     opal_free_list_item_t comp_list;
 218 
 219     opal_free_list_t *my_list;
 220 
 221     struct mca_btl_base_module_t *btl;
 222     struct mca_btl_base_endpoint_t *endpoint;
 223     struct mca_btl_ofi_context_t *my_context;
 224     int type;
 225 };
 226 typedef struct mca_btl_ofi_base_completion_t mca_btl_ofi_base_completion_t;
 227 
 228 struct mca_btl_ofi_rdma_completion_t {
 229     mca_btl_ofi_base_completion_t base;
 230     mca_btl_ofi_completion_context_t comp_ctx;
 231     void *local_address;
 232     mca_btl_base_registration_handle_t *local_handle;
 233 
 234     uint64_t operand;
 235     uint64_t compare;
 236 
 237     mca_btl_base_rdma_completion_fn_t cbfunc;
 238     void *cbcontext;
 239     void *cbdata;
 240 };
 241 typedef struct mca_btl_ofi_rdma_completion_t mca_btl_ofi_rdma_completion_t;
 242 
 243 struct mca_btl_ofi_frag_completion_t {
 244     mca_btl_ofi_base_completion_t base;
 245     mca_btl_ofi_completion_context_t comp_ctx;
 246     mca_btl_ofi_base_frag_t *frag;
 247 };
 248 typedef struct mca_btl_ofi_frag_completion_t mca_btl_ofi_frag_completion_t;
 249 
 250 OBJ_CLASS_DECLARATION(mca_btl_ofi_rdma_completion_t);
 251 OBJ_CLASS_DECLARATION(mca_btl_ofi_frag_completion_t);
 252 
 253 /**
 254  * Initiate an asynchronous put.
 255  * Completion Semantics: if this function returns a 1 then the operation
 256  *                       is complete. a return of OPAL_SUCCESS indicates
 257  *                       the put operation has been queued with the
 258  *                       network. the local_handle can not be deregistered
 259  *                       until all outstanding operations on that handle
 260  *                       have been completed.
 261  *
 262  * @param btl (IN)            BTL module
 263  * @param endpoint (IN)       BTL addressing information
 264  * @param local_address (IN)  Local address to put from (registered)
 265  * @param remote_address (IN) Remote address to put to (registered remotely)
 266  * @param local_handle (IN)   Registration handle for region containing
 267  *                            (local_address, local_address + size)
 268  * @param remote_handle (IN)  Remote registration handle for region containing
 269  *                            (remote_address, remote_address + size)
 270  * @param size (IN)           Number of bytes to put
 271  * @param flags (IN)          Flags for this put operation
 272  * @param order (IN)          Ordering
 273  * @param cbfunc (IN)         Function to call on completion (if queued)
 274  * @param cbcontext (IN)      Context for the callback
 275  * @param cbdata (IN)         Data for callback
 276  *
 277  * @retval OPAL_SUCCESS    The descriptor was successfully queued for a put
 278  * @retval OPAL_ERROR      The descriptor was NOT successfully queued for a put
 279  * @retval OPAL_ERR_OUT_OF_RESOURCE  Insufficient resources to queue the put
 280  *                         operation. Try again later
 281  * @retval OPAL_ERR_NOT_AVAILABLE  Put can not be performed due to size or
 282  *                         alignment restrictions.
 283  */
 284 int mca_btl_ofi_put (struct mca_btl_base_module_t *btl,
 285     struct mca_btl_base_endpoint_t *endpoint, void *local_address,
 286     uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
 287     struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 288     int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 289 
 290 /**
 291  * Initiate an asynchronous get.
 292  * Completion Semantics: if this function returns a 1 then the operation
 293  *                       is complete. a return of OPAL_SUCCESS indicates
 294  *                       the get operation has been queued with the
 295  *                       network. the local_handle can not be deregistered
 296  *                       until all outstanding operations on that handle
 297  *                       have been completed.
 298  *
 299  * @param btl (IN)            BTL module
 300  * @param endpoint (IN)       BTL addressing information
 301  * @param local_address (IN)  Local address to put from (registered)
 302  * @param remote_address (IN) Remote address to put to (registered remotely)
 303  * @param local_handle (IN)   Registration handle for region containing
 304  *                            (local_address, local_address + size)
 305  * @param remote_handle (IN)  Remote registration handle for region containing
 306  *                            (remote_address, remote_address + size)
 307  * @param size (IN)           Number of bytes to put
 308  * @param flags (IN)          Flags for this put operation
 309  * @param order (IN)          Ordering
 310  * @param cbfunc (IN)         Function to call on completion (if queued)
 311  * @param cbcontext (IN)      Context for the callback
 312  * @param cbdata (IN)         Data for callback
 313  *
 314  * @retval OPAL_SUCCESS    The descriptor was successfully queued for a put
 315  * @retval OPAL_ERROR      The descriptor was NOT successfully queued for a put
 316  * @retval OPAL_ERR_OUT_OF_RESOURCE  Insufficient resources to queue the put
 317  *                         operation. Try again later
 318  * @retval OPAL_ERR_NOT_AVAILABLE  Put can not be performed due to size or
 319  *                         alignment restrictions.
 320  */
 321 int mca_btl_ofi_get (struct mca_btl_base_module_t *btl,
 322     struct mca_btl_base_endpoint_t *endpoint, void *local_address,
 323     uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
 324     struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 325     int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 326 
 327 int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
 328                      uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
 329                      mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
 330                      mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 331 
 332 int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
 333                       void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 334                       mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
 335                       uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
 336                       void *cbcontext, void *cbdata);
 337 
 338 int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
 339                         void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 340                         mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
 341                         int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 342 
 343 
 344 int mca_btl_ofi_flush (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint);
 345 
 346 int mca_btl_ofi_finalize (mca_btl_base_module_t *btl);
 347 
 348 void mca_btl_ofi_rcache_init (mca_btl_ofi_module_t *module);
 349 int mca_btl_ofi_reg_mem (void *reg_data, void *base, size_t size,
 350                          mca_rcache_base_registration_t *reg);
 351 int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg);
 352 
 353 int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context);
 354 
 355 mca_btl_ofi_module_t * mca_btl_ofi_module_alloc (int mode);
 356 
 357 int mca_btl_ofi_post_recvs(mca_btl_base_module_t* module, mca_btl_ofi_context_t *context, int count);
 358 void mca_btl_ofi_exit(void);
 359 
 360 /* thread atomics */
 361 static inline bool mca_btl_ofi_context_trylock (mca_btl_ofi_context_t *context)
 362 {
 363     return (context->lock || OPAL_ATOMIC_SWAP_32(&context->lock, 1));
 364 }
 365 
 366 static inline void mca_btl_ofi_context_lock(mca_btl_ofi_context_t *context)
 367 {
 368     while (mca_btl_ofi_context_trylock(context));
 369 }
 370 
 371 static inline void mca_btl_ofi_context_unlock(mca_btl_ofi_context_t *context)
 372 {
 373     opal_atomic_mb();
 374     context->lock = 0;
 375 }
 376 
 377 END_C_DECLS
 378 #endif

/* [<][>][^][v][top][bottom][index][help] */