root/opal/mca/btl/uct/btl_uct_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_uct_get_ep
  2. mca_btl_uct_add_procs
  3. mca_btl_uct_del_procs
  4. mca_btl_uct_register_mem
  5. mca_btl_uct_deregister_mem
  6. mca_btl_uct_reg_mem
  7. mca_btl_uct_dereg_mem
  8. mca_btl_uct_finalize
  9. mca_btl_uct_md_construct
  10. mca_btl_uct_md_destruct

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2013 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
  14  *                         reserved.
  15  * $COPYRIGHT$
  16  *
  17  * Additional copyrights may follow
  18  *
  19  * $HEADER$
  20  */
  21 
  22 #include "opal_config.h"
  23 #include <string.h>
  24 #include "opal/class/opal_bitmap.h"
  25 #include "opal/mca/btl/btl.h"
  26 #include "opal/datatype/opal_convertor.h"
  27 #include "opal/mca/mpool/base/base.h"
  28 #include "opal/mca/mpool/mpool.h"
  29 
  30 #include "btl_uct.h"
  31 #include "btl_uct_endpoint.h"
  32 #include "btl_uct_am.h"
  33 
  34 struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc)
  35 {
  36     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) module;
  37     mca_btl_base_endpoint_t *ep;
  38     int rc;
  39 
  40     opal_mutex_lock (&uct_module->endpoint_lock);
  41 
  42     do {
  43         rc = opal_hash_table_get_value_uint64 (&uct_module->id_to_endpoint, (intptr_t) proc, (void **) &ep);
  44         if (OPAL_SUCCESS == rc) {
  45             BTL_VERBOSE(("returning existing endpoint for proc %s", OPAL_NAME_PRINT(proc->proc_name)));
  46             break;
  47         }
  48 
  49         /*  Create and Init endpoints */
  50         ep = mca_btl_uct_endpoint_create (proc);
  51         if (OPAL_UNLIKELY(NULL == ep)) {
  52             BTL_ERROR(("btl/uct error initializing endpoint"));
  53             break;
  54         }
  55 
  56         BTL_VERBOSE(("endpoint initialized. new endpoint: %p", (void *) ep));
  57 
  58         /* add this endpoint to the connection lookup table */
  59         (void) opal_hash_table_set_value_uint64 (&uct_module->id_to_endpoint, (intptr_t) proc, ep);
  60     } while (0);
  61 
  62     opal_mutex_unlock (&uct_module->endpoint_lock);
  63 
  64     return ep;
  65 }
  66 
  67 static int mca_btl_uct_add_procs (mca_btl_base_module_t *btl,
  68                                   size_t nprocs, opal_proc_t **opal_procs,
  69                                   mca_btl_base_endpoint_t **peers,
  70                                   opal_bitmap_t *reachable)
  71 {
  72     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) btl;
  73     int rc;
  74 
  75     if (false == uct_module->initialized) {
  76         mca_btl_uct_tl_t *am_tl = uct_module->am_tl;
  77 
  78         /* NTH: might want to vary this size based off the universe size (if
  79          * one exists). the table is only used for connection lookup and
  80          * endpoint removal. */
  81         rc = opal_hash_table_init (&uct_module->id_to_endpoint, 512);
  82         if (OPAL_SUCCESS != rc) {
  83             BTL_ERROR(("error initializing the endpoint hash. rc = %d", rc));
  84             return rc;
  85         }
  86 
  87         if (am_tl) {
  88             rc = opal_free_list_init (&uct_module->short_frags, sizeof (mca_btl_uct_base_frag_t),
  89                                       opal_cache_line_size, OBJ_CLASS(mca_btl_uct_base_frag_t),
  90                                       MCA_BTL_UCT_TL_ATTR(am_tl, 0).cap.am.max_short, opal_cache_line_size,
  91                                       0, 1024, 64, NULL, 0, NULL, NULL, NULL);
  92 
  93             rc = opal_free_list_init (&uct_module->eager_frags, sizeof (mca_btl_uct_base_frag_t),
  94                                       opal_cache_line_size, OBJ_CLASS(mca_btl_uct_base_frag_t),
  95                                       btl->btl_eager_limit, opal_cache_line_size,
  96                                       0, 1024, 64, NULL, 0, uct_module->rcache, NULL, NULL);
  97 
  98             rc = opal_free_list_init (&uct_module->max_frags, sizeof (mca_btl_uct_base_frag_t),
  99                                       opal_cache_line_size, OBJ_CLASS(mca_btl_uct_base_frag_t),
 100                                       btl->btl_max_send_size, opal_cache_line_size, 0, 128, 8,
 101                                       NULL, 0, uct_module->rcache, NULL, NULL);
 102         }
 103 
 104         uct_module->initialized = true;
 105     }
 106 
 107     for (size_t i = 0 ; i < nprocs ; ++i) {
 108         /* all endpoints are reachable for uct */
 109         peers[i] = mca_btl_uct_get_ep (btl, opal_procs[i]);
 110         if (OPAL_UNLIKELY(NULL == peers[i])) {
 111             return OPAL_ERR_OUT_OF_RESOURCE;
 112         }
 113 
 114         opal_bitmap_set_bit(reachable, i);
 115     }
 116 
 117     return OPAL_SUCCESS;
 118 }
 119 
 120 static int mca_btl_uct_del_procs (mca_btl_base_module_t *btl, size_t nprocs,
 121                                   opal_proc_t **procs, mca_btl_base_endpoint_t **peers)
 122 {
 123     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) btl;
 124     mca_btl_base_endpoint_t *ep;
 125     int rc;
 126 
 127     for (size_t i = 0 ; i < nprocs ; ++i) {
 128         if (NULL == procs[i]) {
 129             continue;
 130         }
 131 
 132         rc = opal_hash_table_get_value_uint64 (&uct_module->id_to_endpoint, (intptr_t) procs[i], (void **) &ep);
 133         if (OPAL_SUCCESS != rc) {
 134             continue;
 135         }
 136 
 137         (void) opal_hash_table_remove_value_uint64 (&uct_module->id_to_endpoint, (intptr_t) procs[i]);
 138         OBJ_RELEASE(ep);
 139     }
 140 
 141     return OPAL_SUCCESS;
 142 }
 143 
 144 
 145 /**
 146  * @brief Register a memory region for put/get/atomic operations.
 147  *
 148  * @param btl (IN)         BTL module
 149  * @param endpoint(IN)     BTL addressing information (or NULL for all endpoints)
 150  * @param base (IN)        Pointer to start of region
 151  * @param size (IN)        Size of region
 152  * @param flags (IN)       Flags indicating what operation will be performed. Valid
 153  *                         values are MCA_BTL_DES_FLAGS_PUT, MCA_BTL_DES_FLAGS_GET,
 154  *                         and MCA_BTL_DES_FLAGS_ATOMIC
 155  *
 156  * @returns a memory registration handle valid for both local and remote operations
 157  * @returns NULL if the region could not be registered
 158  *
 159  * This function registers the specified region with the hardware for use with
 160  * the btl_put, btl_get, btl_atomic_cas, btl_atomic_op, and btl_atomic_fop
 161  * functions. Care should be taken to not hold an excessive number of registrations
 162  * as they may use limited system/NIC resources.
 163  */
 164 static struct mca_btl_base_registration_handle_t *
 165 mca_btl_uct_register_mem (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *base,
 166                           size_t size, uint32_t flags)
 167 {
 168     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) btl;
 169     mca_btl_uct_reg_t *reg;
 170     int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
 171     int rc;
 172 
 173     rc = uct_module->rcache->rcache_register (uct_module->rcache, base, size, 0, access_flags,
 174                                               (mca_rcache_base_registration_t **) &reg);
 175     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 176         return NULL;
 177     }
 178 
 179     return &reg->handle;
 180 }
 181 
 182 /**
 183  * @brief Deregister a memory region
 184  *
 185  * @param btl (IN)         BTL module region was registered with
 186  * @param handle (IN)      BTL registration handle to deregister
 187  *
 188  * This function deregisters the memory region associated with the specified handle. Care
 189  * should be taken to not perform any RDMA or atomic operation on this memory region
 190  * after it is deregistered. It is erroneous to specify a memory handle associated with
 191  * a remote node.
 192  */
 193 static int mca_btl_uct_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
 194 {
 195     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) btl;
 196     mca_btl_uct_reg_t *reg =
 197         (mca_btl_uct_reg_t *)((intptr_t) handle - offsetof (mca_btl_uct_reg_t, handle));
 198 
 199     (void) uct_module->rcache->rcache_deregister (uct_module->rcache, &reg->base);
 200 
 201     return OPAL_SUCCESS;
 202 }
 203 
 204 int mca_btl_uct_reg_mem (void *reg_data, void *base, size_t size, mca_rcache_base_registration_t *reg)
 205 {
 206     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) reg_data;
 207     mca_btl_uct_reg_t *uct_reg = (mca_btl_uct_reg_t *) reg;
 208     ucs_status_t ucs_status;
 209     int uct_flags = 0;
 210 
 211     BTL_VERBOSE(("attempting to register range {%p,%p} with uct", base, (char *) base + size));
 212 
 213     if (MCA_BTL_REG_FLAG_REMOTE_READ & reg->access_flags) {
 214         uct_flags |= UCT_MD_MEM_ACCESS_REMOTE_GET;
 215     }
 216     if (MCA_BTL_REG_FLAG_REMOTE_WRITE & reg->access_flags) {
 217         uct_flags |= UCT_MD_MEM_ACCESS_REMOTE_PUT;
 218     }
 219     if (MCA_BTL_REG_FLAG_REMOTE_ATOMIC & reg->access_flags) {
 220         uct_flags |= UCT_MD_MEM_ACCESS_REMOTE_ATOMIC;
 221     }
 222 
 223     /* UCT barfs if there are no access flags */
 224     if (0 == uct_flags) {
 225         uct_flags = UCT_MD_MEM_ACCESS_ALL;
 226     }
 227 
 228     ucs_status = uct_md_mem_reg (uct_module->md->uct_md, base, size, uct_flags, &uct_reg->uct_memh);
 229     if (UCS_OK != ucs_status) {
 230         BTL_VERBOSE(("Error registering memory with UCT. code: %d", ucs_status));
 231         return OPAL_ERR_OUT_OF_RESOURCE;
 232     }
 233 
 234     if (reg->access_flags & (MCA_BTL_REG_FLAG_REMOTE_READ | MCA_BTL_REG_FLAG_REMOTE_WRITE | MCA_BTL_REG_FLAG_REMOTE_ATOMIC)) {
 235         /* requested registration may be used by a remote process so go ahead and pack
 236          * the registration handle */
 237         ucs_status = uct_md_mkey_pack (uct_module->md->uct_md, uct_reg->uct_memh, uct_reg->handle.packed_handle);
 238         if (OPAL_UNLIKELY(UCS_OK != ucs_status)) {
 239             BTL_VERBOSE(("Could not pack remote key. code: %d", ucs_status));
 240             uct_md_mem_dereg (uct_module->md->uct_md, uct_reg->uct_memh);
 241             return OPAL_ERR_OUT_OF_RESOURCE;
 242         }
 243     }
 244 
 245     return OPAL_SUCCESS;
 246 }
 247 
 248 int mca_btl_uct_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
 249 {
 250     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) reg_data;
 251     mca_btl_uct_reg_t *uct_reg = (mca_btl_uct_reg_t *) reg;
 252 
 253     uct_md_mem_dereg (uct_module->md->uct_md, uct_reg->uct_memh);
 254 
 255     return OPAL_SUCCESS;
 256 }
 257 
 258 
 259 /*
 260  * Cleanup/release module resources.
 261  */
 262 
 263 int mca_btl_uct_finalize (mca_btl_base_module_t* btl)
 264 {
 265     mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) btl;
 266     mca_btl_uct_endpoint_t *endpoint;
 267     uint64_t key;
 268 
 269     /* clean up any leftover endpoints */
 270     OPAL_HASH_TABLE_FOREACH(key, uint64, endpoint, &uct_module->id_to_endpoint) {
 271         OBJ_RELEASE(endpoint);
 272     }
 273     OBJ_DESTRUCT(&uct_module->id_to_endpoint);
 274     OBJ_DESTRUCT(&uct_module->short_frags);
 275     OBJ_DESTRUCT(&uct_module->eager_frags);
 276     OBJ_DESTRUCT(&uct_module->max_frags);
 277     OBJ_DESTRUCT(&uct_module->pending_frags);
 278     OBJ_DESTRUCT(&uct_module->lock);
 279     OBJ_DESTRUCT(&uct_module->pending_connection_reqs);
 280 
 281     if (uct_module->rcache) {
 282         mca_rcache_base_module_destroy (uct_module->rcache);
 283     }
 284 
 285     if (NULL != uct_module->am_tl) {
 286         OBJ_RELEASE(uct_module->am_tl);
 287     }
 288 
 289     if (NULL != uct_module->conn_tl) {
 290         OBJ_RELEASE(uct_module->conn_tl);
 291     }
 292 
 293     if (NULL != uct_module->rdma_tl) {
 294         OBJ_RELEASE(uct_module->rdma_tl);
 295     }
 296 
 297     ucs_async_context_destroy (uct_module->ucs_async);
 298 
 299     OBJ_DESTRUCT(&uct_module->endpoint_lock);
 300 
 301     free (uct_module->md_name);
 302     free (uct_module);
 303 
 304     return OPAL_SUCCESS;
 305 }
 306 
 307 mca_btl_uct_module_t mca_btl_uct_module_template = {
 308     .super = {
 309         /* initialize functions. this btl only support RDMA and atomics
 310          * for now so it does not provide prepare_src, alloc, free, or send */
 311         .btl_component      = &mca_btl_uct_component.super,
 312         .btl_add_procs      = mca_btl_uct_add_procs,
 313         .btl_del_procs      = mca_btl_uct_del_procs,
 314         .btl_finalize       = mca_btl_uct_finalize,
 315         .btl_put            = mca_btl_uct_put,
 316         .btl_get            = mca_btl_uct_get,
 317         .btl_register_mem   = mca_btl_uct_register_mem,
 318         .btl_deregister_mem = mca_btl_uct_deregister_mem,
 319         .btl_atomic_op      = mca_btl_uct_aop,
 320         .btl_atomic_fop     = mca_btl_uct_afop,
 321         .btl_atomic_cswap   = mca_btl_uct_acswap,
 322         .btl_flush          = mca_btl_uct_flush,
 323 
 324         .btl_sendi          = mca_btl_uct_sendi,
 325         .btl_prepare_src    = mca_btl_uct_prepare_src,
 326         .btl_send           = mca_btl_uct_send,
 327         .btl_alloc          = mca_btl_uct_alloc,
 328         .btl_free           = mca_btl_uct_free,
 329 
 330         /* set the default flags for this btl. uct provides us with rdma and both
 331          * fetching and non-fetching atomics (though limited to add and cswap) */
 332         .btl_flags          = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS,
 333         .btl_atomic_flags   = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_CSWAP |
 334                               MCA_BTL_ATOMIC_SUPPORTS_SWAP | MCA_BTL_ATOMIC_SUPPORTS_32BIT,
 335 
 336         /* set the default limits on put and get */
 337         .btl_put_limit      = 1 << 23,
 338         .btl_put_alignment  = 0,
 339         .btl_get_limit      = 1 << 23,
 340         .btl_get_alignment  = 0,
 341 
 342         .btl_rndv_eager_limit = 8192,
 343         .btl_rdma_pipeline_frag_size = 4 * 1024 * 1024,
 344         .btl_rdma_pipeline_send_length = 8192,
 345         .btl_eager_limit    = 8192,
 346         .btl_max_send_size  = 65536,
 347    }
 348 };
 349 
 350 OBJ_CLASS_INSTANCE(mca_btl_uct_reg_t, opal_free_list_item_t, NULL, NULL);
 351 
 352 static void mca_btl_uct_md_construct (mca_btl_uct_md_t *md)
 353 {
 354     md->uct_md = NULL;
 355 }
 356 
 357 static void mca_btl_uct_md_destruct (mca_btl_uct_md_t *md)
 358 {
 359     if (md->uct_md) {
 360         uct_md_close (md->uct_md);
 361         md->uct_md = NULL;
 362     }
 363 }
 364 
 365 OBJ_CLASS_INSTANCE(mca_btl_uct_md_t, opal_object_t, mca_btl_uct_md_construct, mca_btl_uct_md_destruct);

/* [<][>][^][v][top][bottom][index][help] */