This source file includes following definitions.
- mca_btl_ofi_add_procs
- mca_btl_ofi_del_procs
- mca_btl_ofi_rcache_init
- mca_btl_ofi_register_mem
- mca_btl_ofi_deregister_mem
- mca_btl_ofi_reg_mem
- mca_btl_ofi_dereg_mem
- mca_btl_ofi_finalize
- mca_btl_ofi_post_recvs
- mca_btl_ofi_module_alloc
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 #include "opal_config.h"
  26 #include <string.h>
  27 #include "opal/class/opal_bitmap.h"
  28 #include "opal/util/printf.h"
  29 #include "opal/mca/btl/btl.h"
  30 #include "opal/datatype/opal_convertor.h"
  31 #include "opal/mca/mpool/base/base.h"
  32 #include "opal/mca/mpool/mpool.h"
  33 
  34 #include "btl_ofi.h"
  35 #include "btl_ofi_endpoint.h"
  36 #include "btl_ofi_frag.h"
  37 
  38 static int mca_btl_ofi_add_procs (mca_btl_base_module_t *btl,
  39                                   size_t nprocs, opal_proc_t **opal_procs,
  40                                   mca_btl_base_endpoint_t **peers,
  41                                   opal_bitmap_t *reachable)
  42 {
  43     int rc;
  44     int count;
  45     char *ep_name = NULL;
  46     size_t namelen = mca_btl_ofi_component.namelen;
  47 
  48     opal_proc_t *proc;
  49     mca_btl_base_endpoint_t *ep;
  50 
  51     mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
  52 
  53     for (size_t i = 0 ; i < nprocs ; ++i) {
  54 
  55         proc = opal_procs[i];
  56 
  57         
  58         rc = opal_hash_table_get_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) proc, (void **) &ep);
  59 
  60         if (OPAL_SUCCESS == rc) {
  61             BTL_VERBOSE(("returning existing endpoint for proc %s", OPAL_NAME_PRINT(proc->proc_name)));
  62             peers[i] = ep;
  63 
  64         } else {
  65             
  66             peers[i] = mca_btl_ofi_endpoint_create (proc, ofi_btl->ofi_endpoint);
  67             BTL_VERBOSE(("creating peer %p", (void*) peers[i]));
  68 
  69             if (OPAL_UNLIKELY(NULL == peers[i])) {
  70                 return OPAL_ERR_OUT_OF_RESOURCE;
  71             }
  72 
  73             
  74             (void) opal_hash_table_set_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) proc, (void**) &ep);
  75         }
  76 
  77         OPAL_MODEX_RECV(rc, &mca_btl_ofi_component.super.btl_version,
  78                         &peers[i]->ep_proc->proc_name, (void **)&ep_name, &namelen);
  79         if (OPAL_SUCCESS != rc) {
  80             BTL_ERROR(("error receiving modex"));
  81             MCA_BTL_OFI_ABORT();
  82         }
  83 
  84         
  85         count = fi_av_insert(ofi_btl->av,      
  86                              ep_name,          
  87                              1,                
  88                              &peers[i]->peer_addr, 
  89                              0,                
  90                              NULL);            
  91 
  92         
  93         if (count == 1) { 
  94             opal_list_append (&ofi_btl->endpoints, &peers[i]->super);
  95             opal_bitmap_set_bit(reachable, i);
  96         } else {
  97             BTL_VERBOSE(("fi_av_insert failed with rc = %d", count));
  98             MCA_BTL_OFI_ABORT();
  99         }
 100     }
 101 
 102     return OPAL_SUCCESS;
 103 }
 104 
 105 static int mca_btl_ofi_del_procs (mca_btl_base_module_t *btl, size_t nprocs,
 106                                   opal_proc_t **procs, mca_btl_base_endpoint_t **peers)
 107 {
 108     int rc;
 109     mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
 110     mca_btl_base_endpoint_t *ep;
 111 
 112     for (size_t i = 0 ; i < nprocs ; ++i) {
 113         if (peers[i]) {
 114             rc = opal_hash_table_get_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) procs[i], (void **) &ep);
 115 
 116             if (OPAL_SUCCESS == rc) {
 117                 
 118                 rc = fi_av_remove(ofi_btl->av, &peers[i]->peer_addr, 1, 0);
 119                 if (rc < 0) {
 120                     
 121                     
 122                     BTL_ERROR(("fi_av_remove failed with error %d:%s",
 123                                     rc, fi_strerror(-rc)));
 124                 }
 125 
 126                 
 127                 opal_list_remove_item (&ofi_btl->endpoints, &peers[i]->super);
 128                 (void) opal_hash_table_remove_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) procs[i]);
 129                 OBJ_RELEASE(peers[i]);
 130            }
 131         }
 132     }
 133 
 134     return OPAL_SUCCESS;
 135 }
 136 
 137 void mca_btl_ofi_rcache_init (mca_btl_ofi_module_t *module)
 138 {
 139     if (!module->initialized) {
 140         mca_rcache_base_resources_t rcache_resources;
 141         char *tmp;
 142 
 143         (void) opal_asprintf (&tmp, "ofi.%s", module->linux_device_name);
 144 
 145         rcache_resources.cache_name     = tmp;
 146         rcache_resources.reg_data       = (void *) module;
 147         rcache_resources.sizeof_reg     = sizeof (mca_btl_ofi_reg_t);
 148         rcache_resources.register_mem   = mca_btl_ofi_reg_mem;
 149         rcache_resources.deregister_mem = mca_btl_ofi_dereg_mem;
 150 
 151         module->rcache = mca_rcache_base_module_create ("grdma", module, &rcache_resources);
 152         free (tmp);
 153 
 154         if (NULL == module->rcache) {
 155             
 156             BTL_ERROR(("cannot create rcache"));
 157             MCA_BTL_OFI_ABORT();
 158         }
 159 
 160         module->initialized = true;
 161     }
 162 }
 163 
 164 
 165 
 166 
 167 
 168 
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176 
 177 
 178 
 179 
 180 
 181 
 182 
 183 
 184 static struct mca_btl_base_registration_handle_t *
 185 mca_btl_ofi_register_mem (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *base,
 186                           size_t size, uint32_t flags)
 187 {
 188     mca_btl_ofi_module_t *ofi_module = (mca_btl_ofi_module_t *) btl;
 189     mca_btl_ofi_reg_t *reg;
 190     int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
 191     int rc;
 192 
 193     rc = ofi_module->rcache->rcache_register (ofi_module->rcache, base, size, 0, access_flags,
 194                                               (mca_rcache_base_registration_t **) ®);
 195     if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
 196         return NULL;
 197     }
 198 
 199     return ®->handle;
 200 }
 201 
 202 
 203 
 204 
 205 
 206 
 207 
 208 
 209 
 210 
 211 
 212 
 213 static int mca_btl_ofi_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
 214 {
 215     mca_btl_ofi_module_t *ofi_module = (mca_btl_ofi_module_t *) btl;
 216     mca_btl_ofi_reg_t *reg =
 217         (mca_btl_ofi_reg_t *)((intptr_t) handle - offsetof (mca_btl_ofi_reg_t, handle));
 218 
 219     (void) ofi_module->rcache->rcache_deregister (ofi_module->rcache, ®->base);
 220 
 221     return OPAL_SUCCESS;
 222 }
 223 
 224 int mca_btl_ofi_reg_mem (void *reg_data, void *base, size_t size, mca_rcache_base_registration_t *reg)
 225 {
 226     int rc;
 227     static uint64_t access_flags = FI_REMOTE_WRITE | FI_REMOTE_READ | FI_READ | FI_WRITE;
 228 
 229     mca_btl_ofi_module_t *btl = (mca_btl_ofi_module_t*) reg_data;
 230     mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t*) reg;
 231 
 232     rc = fi_mr_reg(btl->domain, base, size, access_flags, 0,
 233                    (uint64_t) reg, 0, &ur->ur_mr, NULL);
 234     if (0 != rc) {
 235         return OPAL_ERR_OUT_OF_RESOURCE;
 236     }
 237 
 238     ur->handle.rkey = fi_mr_key(ur->ur_mr);
 239     ur->handle.desc = fi_mr_desc(ur->ur_mr);
 240 
 241     
 242 
 243 
 244     if (btl->use_virt_addr) {
 245         ur->handle.base_addr = 0;
 246     } else {
 247         ur->handle.base_addr = base;
 248     }
 249 
 250     return OPAL_SUCCESS;
 251 }
 252 
 253 int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
 254 {
 255     mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t*)reg;
 256 
 257     if (ur->ur_mr != NULL) {
 258         if (0 != fi_close(&ur->ur_mr->fid)) {
 259             BTL_ERROR(("%s: error unpinning memory mr=%p: %s",
 260                        __func__, (void*) ur->ur_mr, strerror(errno)));
 261             return OPAL_ERROR;
 262         }
 263     }
 264 
 265     return OPAL_SUCCESS;
 266 }
 267 
 268 
 269 
 270 
 271 
 272 int mca_btl_ofi_finalize (mca_btl_base_module_t* btl)
 273 {
 274     int i;
 275     mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
 276     mca_btl_ofi_endpoint_t *endpoint, *next;
 277 
 278     assert(btl);
 279 
 280     
 281     for (i=0; i < ofi_btl->num_contexts; i++) {
 282         mca_btl_ofi_context_finalize(&ofi_btl->contexts[i], ofi_btl->is_scalable_ep);
 283     }
 284     free(ofi_btl->contexts);
 285 
 286     if (NULL != ofi_btl->av) {
 287         fi_close(&ofi_btl->av->fid);
 288     }
 289 
 290     if (NULL != ofi_btl->ofi_endpoint) {
 291         fi_close(&ofi_btl->ofi_endpoint->fid);
 292     }
 293 
 294     if (NULL != ofi_btl->domain) {
 295         fi_close(&ofi_btl->domain->fid);
 296     }
 297 
 298     if (NULL != ofi_btl->fabric) {
 299         fi_close(&ofi_btl->fabric->fid);
 300     }
 301 
 302     if (NULL != ofi_btl->fabric_info) {
 303         fi_freeinfo(ofi_btl->fabric_info);
 304     }
 305 
 306     
 307     OPAL_LIST_FOREACH_SAFE(endpoint, next, &ofi_btl->endpoints, mca_btl_ofi_endpoint_t) {
 308         opal_list_remove_item (&ofi_btl->endpoints, &endpoint->super);
 309         OBJ_RELEASE(endpoint);
 310     }
 311 
 312     OBJ_DESTRUCT(&ofi_btl->endpoints);
 313     OBJ_DESTRUCT(&ofi_btl->id_to_endpoint);
 314     OBJ_DESTRUCT(&ofi_btl->module_lock);
 315 
 316     if (ofi_btl->rcache) {
 317         mca_rcache_base_module_destroy (ofi_btl->rcache);
 318         ofi_btl->rcache = NULL;
 319     }
 320 
 321     free (btl);
 322 
 323     return OPAL_SUCCESS;
 324 }
 325 
 326 
 327 int mca_btl_ofi_post_recvs (mca_btl_base_module_t *module,
 328                             mca_btl_ofi_context_t *context,
 329                             int count)
 330 {
 331     int i;
 332     int rc;
 333     mca_btl_ofi_base_frag_t *frag;
 334     mca_btl_ofi_frag_completion_t *comp;
 335 
 336     for (i=0; i < count; i++) {
 337         frag = (mca_btl_ofi_base_frag_t*) mca_btl_ofi_alloc(module,
 338                                                      NULL,
 339                                                      0,
 340                                                      MCA_BTL_OFI_FRAG_SIZE,
 341                                                      MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
 342         if (NULL == frag) {
 343             BTL_ERROR(("cannot allocate recv frag."));
 344             return OPAL_ERROR;
 345         }
 346 
 347         comp = mca_btl_ofi_frag_completion_alloc (module,
 348                                                   context,
 349                                                   frag,
 350                                                   MCA_BTL_OFI_TYPE_RECV);
 351 
 352         rc = fi_recv (context->rx_ctx, &frag->hdr, MCA_BTL_OFI_RECV_SIZE,
 353                       NULL, FI_ADDR_UNSPEC, &comp->comp_ctx);
 354 
 355         if (FI_SUCCESS != rc) {
 356             BTL_ERROR(("cannot post recvs"));
 357             return OPAL_ERROR;
 358         }
 359     }
 360     return OPAL_SUCCESS;
 361 }
 362 
 363 
 364 mca_btl_ofi_module_t * mca_btl_ofi_module_alloc (int mode)
 365 {
 366     mca_btl_ofi_module_t *module;
 367 
 368     
 369     module = (mca_btl_ofi_module_t*) calloc(1, sizeof(mca_btl_ofi_module_t));
 370     if (NULL == module) {
 371         return NULL;
 372     }
 373 
 374     
 375     *module = mca_btl_ofi_module_template;
 376 
 377     if (mode == MCA_BTL_OFI_MODE_ONE_SIDED || mode == MCA_BTL_OFI_MODE_FULL_SUPPORT) {
 378 
 379         module->super.btl_put            = mca_btl_ofi_put;
 380         module->super.btl_get            = mca_btl_ofi_get;
 381         module->super.btl_atomic_op      = mca_btl_ofi_aop;
 382         module->super.btl_atomic_fop     = mca_btl_ofi_afop;
 383         module->super.btl_atomic_cswap   = mca_btl_ofi_acswap;
 384         module->super.btl_flush          = mca_btl_ofi_flush;
 385 
 386         module->super.btl_register_mem   = mca_btl_ofi_register_mem;
 387         module->super.btl_deregister_mem = mca_btl_ofi_deregister_mem;
 388 
 389         module->super.btl_flags         |= MCA_BTL_FLAGS_ATOMIC_FOPS |
 390                                            MCA_BTL_FLAGS_ATOMIC_OPS |
 391                                            MCA_BTL_FLAGS_RDMA;
 392 
 393         module->super.btl_atomic_flags   = MCA_BTL_ATOMIC_SUPPORTS_ADD |
 394                                            MCA_BTL_ATOMIC_SUPPORTS_SWAP |
 395                                            MCA_BTL_ATOMIC_SUPPORTS_CSWAP |
 396                                            MCA_BTL_ATOMIC_SUPPORTS_32BIT ;
 397 
 398         module->super.btl_put_limit = 1 << 23;
 399         module->super.btl_put_alignment = 0;
 400 
 401         module->super.btl_get_limit = 1 << 23;
 402         module->super.btl_get_alignment = 0;
 403 
 404         module->super.btl_registration_handle_size =
 405                                 sizeof(mca_btl_base_registration_handle_t);
 406     }
 407 
 408     if (mode == MCA_BTL_OFI_MODE_TWO_SIDED || mode == MCA_BTL_OFI_MODE_FULL_SUPPORT) {
 409 
 410         module->super.btl_alloc          = mca_btl_ofi_alloc;
 411         module->super.btl_free           = mca_btl_ofi_free;
 412         module->super.btl_prepare_src    = mca_btl_ofi_prepare_src;
 413 
 414         module->super.btl_send           = mca_btl_ofi_send;
 415 
 416         module->super.btl_flags         |= MCA_BTL_FLAGS_SEND;
 417         module->super.btl_eager_limit    = MCA_BTL_OFI_FRAG_SIZE;
 418         module->super.btl_max_send_size  = MCA_BTL_OFI_FRAG_SIZE;
 419         module->super.btl_rndv_eager_limit = MCA_BTL_OFI_FRAG_SIZE;
 420 
 421         
 422 
 423         module->super.btl_exclusivity    = MCA_BTL_EXCLUSIVITY_HIGH;
 424     }
 425 
 426     if (mode == MCA_BTL_OFI_MODE_FULL_SUPPORT) {
 427         module->super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024;
 428         module->super.btl_rdma_pipeline_send_length = 8 * 1024;
 429     }
 430 
 431     return module;
 432 }
 433 
 434 mca_btl_ofi_module_t mca_btl_ofi_module_template = {
 435     .super = {
 436         .btl_component      = &mca_btl_ofi_component.super,
 437         .btl_add_procs      = mca_btl_ofi_add_procs,
 438         .btl_del_procs      = mca_btl_ofi_del_procs,
 439         .btl_finalize       = mca_btl_ofi_finalize,
 440    }
 441 };