root/opal/mca/btl/ofi/btl_ofi_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. validate_info
  2. mca_btl_ofi_component_register
  3. mca_btl_ofi_component_open
  4. mca_btl_ofi_component_close
  5. mca_btl_ofi_exit
  6. mca_btl_ofi_component_init
  7. mca_btl_ofi_init_device
  8. mca_btl_ofi_component_progress

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2005 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
  14  *                         reserved.
  15  * Copyright (c) 2018      Intel, Inc, All rights reserved
  16  *
  17  * Copyright (c) 2018      Amazon.com, Inc. or its affiliates.  All Rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 
  26 #include "opal_config.h"
  27 
  28 #include "opal/util/printf.h"
  29 
  30 #include "opal/mca/btl/btl.h"
  31 #include "opal/mca/btl/base/base.h"
  32 #include "opal/mca/hwloc/base/base.h"
  33 
  34 #include <string.h>
  35 
  36 #include "btl_ofi.h"
  37 #include "btl_ofi_endpoint.h"
  38 #include "btl_ofi_rdma.h"
  39 #include "btl_ofi_frag.h"
  40 
  41 #define MCA_BTL_OFI_ONE_SIDED_REQUIRED_CAPS       (FI_RMA | FI_ATOMIC)
  42 #define MCA_BTL_OFI_TWO_SIDED_REQUIRED_CAPS       (FI_MSG)
  43 
  44 #define MCA_BTL_OFI_REQUESTED_MR_MODE   (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR)
  45 
  46 static char *prov_include;
  47 static char *ofi_progress_mode;
  48 static bool disable_sep;
  49 static int mca_btl_ofi_init_device(struct fi_info *info);
  50 
  51 /* validate information returned from fi_getinfo().
  52  * return OPAL_ERROR if we dont have what we need. */
  53 static int validate_info(struct fi_info *info, uint64_t required_caps)
  54 {
  55     int mr_mode;
  56 
  57     BTL_VERBOSE(("validating device: %s", info->domain_attr->name));
  58 
  59     /* we need exactly all the required bits */
  60     if ((info->caps & required_caps) != required_caps) {
  61         BTL_VERBOSE(("unsupported caps"));
  62         return OPAL_ERROR;
  63     }
  64 
  65     /* we need FI_EP_RDM */
  66     if (info->ep_attr->type != FI_EP_RDM) {
  67         BTL_VERBOSE(("unsupported EP type"));
  68         return OPAL_ERROR;
  69     }
  70 
  71     mr_mode = info->domain_attr->mr_mode;
  72 
  73     if (!(mr_mode == FI_MR_BASIC || mr_mode == FI_MR_SCALABLE ||
  74          (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY)) == 0)) {
  75         BTL_VERBOSE(("unsupported MR mode"));
  76         return OPAL_ERROR;
  77     }
  78 
  79     if (!(info->tx_attr->op_flags | FI_DELIVERY_COMPLETE)) {
  80         BTL_VERBOSE(("the endpoint tx_ctx does not support FI_DELIVERY_COMPLETE"));
  81         return OPAL_ERROR;
  82     }
  83 
  84     BTL_VERBOSE(("device: %s is good to go.", info->domain_attr->name));
  85     return OPAL_SUCCESS;
  86 }
  87 
  88 /* Register the MCA parameters */
  89 static int mca_btl_ofi_component_register(void)
  90 {
  91     char *msg;
  92     mca_btl_ofi_module_t *module = &mca_btl_ofi_module_template;
  93 
  94     opal_asprintf(&msg, "BTL OFI mode of operation. Valid values are: %d = One-Sided only, %d=Two-Sided only, "
  95                    "%d = Both one and two sided. BTL OFI is only optimized for one-sided communication",
  96                    MCA_BTL_OFI_MODE_ONE_SIDED,
  97                    MCA_BTL_OFI_MODE_TWO_SIDED,
  98                    MCA_BTL_OFI_MODE_FULL_SUPPORT);
  99     if (NULL == msg) {
 100         return OPAL_ERR_OUT_OF_RESOURCE;
 101     }
 102 
 103     mca_btl_ofi_component.mode = MCA_BTL_OFI_MODE_ONE_SIDED;
 104     (void)mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 105                                           "mode",
 106                                           msg,
 107                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 108                                           OPAL_INFO_LVL_5,
 109                                           MCA_BASE_VAR_SCOPE_READONLY,
 110                                           &mca_btl_ofi_component.mode);
 111 
 112     /* fi_getinfo with prov_name == NULL means ALL provider.
 113      * Since now we are using the first valid info returned, I'm not sure
 114      * if we need to provide the support for comma limited provider list. */
 115     prov_include = NULL;
 116     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 117                                           "provider_include",
 118                                           "OFI provider that ofi btl will query for. This parameter only "
 119                                           "accept ONE provider name. "
 120                                           "(e.g., \"psm2\"; an empty value means that all providers will "
 121                                           "be considered.",
 122                                           MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 123                                           OPAL_INFO_LVL_4,
 124                                           MCA_BASE_VAR_SCOPE_READONLY,
 125                                           &prov_include);
 126 
 127     mca_btl_ofi_component.num_cqe_read = MCA_BTL_OFI_NUM_CQE_READ;
 128     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 129                                           "num_cq_read",
 130                                           "Number of completion entries to read from a single cq_read. ",
 131                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 132                                           OPAL_INFO_LVL_5,
 133                                           MCA_BASE_VAR_SCOPE_READONLY,
 134                                           &mca_btl_ofi_component.num_cqe_read);
 135 
 136     ofi_progress_mode = "unspec";
 137     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 138                                           "progress_mode",
 139                                           "requested provider progress mode. [unspec, auto, manual]"
 140                                           "(default: unspec)",
 141                                           MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 142                                           OPAL_INFO_LVL_5,
 143                                           MCA_BASE_VAR_SCOPE_READONLY,
 144                                           &ofi_progress_mode);
 145 
 146     mca_btl_ofi_component.num_contexts_per_module = 1;
 147     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 148                                           "num_contexts_per_module",
 149                                           "number of communication context per module to create. "
 150                                           "This should increase multithreaded performance but it is "
 151                                           "advised that this number should be lower than total cores.",
 152                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 153                                           OPAL_INFO_LVL_5,
 154                                           MCA_BASE_VAR_SCOPE_READONLY,
 155                                           &mca_btl_ofi_component.num_contexts_per_module);
 156 
 157     disable_sep = false;
 158     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 159                                           "disable_sep",
 160                                           "force btl/ofi to never use scalable endpoint.",
 161                                           MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 162                                           OPAL_INFO_LVL_5,
 163                                           MCA_BASE_VAR_SCOPE_READONLY,
 164                                           &disable_sep);
 165 
 166     mca_btl_ofi_component.progress_threshold = MCA_BTL_OFI_DEFAULT_PROGRESS_THRESHOLD;
 167     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 168                                           "progress_threshold",
 169                                           "number of outstanding operation before btl will progress "
 170                                           "automatically. Tuning this might improve performance on "
 171                                           "certain type of application.",
 172                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 173                                           OPAL_INFO_LVL_5,
 174                                           MCA_BASE_VAR_SCOPE_READONLY,
 175                                           &mca_btl_ofi_component.progress_threshold);
 176 
 177     mca_btl_ofi_component.rd_num = MCA_BTL_OFI_DEFAULT_RD_NUM;
 178     (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
 179                                           "rd_num",
 180                                           "Number of receive descriptor posted per context.",
 181                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 182                                           OPAL_INFO_LVL_5,
 183                                           MCA_BASE_VAR_SCOPE_READONLY,
 184                                           &mca_btl_ofi_component.rd_num);
 185 
 186 
 187     /* for now we want this component to lose to the MTL. */
 188     module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50;
 189 
 190     return mca_btl_base_param_register (&mca_btl_ofi_component.super.btl_version,
 191                                         &module->super);
 192 }
 193 
 194 static int mca_btl_ofi_component_open(void)
 195 {
 196     mca_btl_ofi_component.module_count = 0;
 197     return OPAL_SUCCESS;
 198 }
 199 
 200 /*
 201  * component cleanup - sanity checking of queue lengths
 202  */
 203 static int mca_btl_ofi_component_close(void)
 204 {
 205     /* If we don't sleep, sockets provider freaks out. */
 206     sleep(1);
 207     return OPAL_SUCCESS;
 208 }
 209 
 210 void mca_btl_ofi_exit(void)
 211 {
 212     BTL_ERROR(("BTL OFI will now abort."));
 213     exit(1);
 214 }
 215 
 216 /*
 217  *  OFI component initialization:
 218  *   read interface list from kernel and compare against component parameters
 219  *   then create a BTL instance for selected interfaces
 220  */
 221 
 222 static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, bool enable_progress_threads,
 223                                                            bool enable_mpi_threads)
 224 {
 225     /* for this BTL to be useful the interface needs to support RDMA and certain atomic operations */
 226     int rc;
 227     uint64_t progress_mode;
 228     unsigned resource_count = 0;
 229     struct mca_btl_base_module_t **base_modules;
 230 
 231     BTL_VERBOSE(("initializing ofi btl"));
 232 
 233     /* Set up libfabric hints. */
 234     uint32_t libfabric_api;
 235     libfabric_api = fi_version();
 236 
 237     /* bail if OFI version is less than 1.5. */
 238     if (libfabric_api < FI_VERSION(1, 5)) {
 239         BTL_VERBOSE(("ofi btl disqualified because OFI version < 1.5."));
 240         return NULL;
 241     }
 242 
 243     struct fi_info *info, *info_list;
 244     struct fi_info hints = {0};
 245     struct fi_ep_attr ep_attr = {0};
 246     struct fi_rx_attr rx_attr = {0};
 247     struct fi_tx_attr tx_attr = {0};
 248     struct fi_fabric_attr fabric_attr = {0};
 249     struct fi_domain_attr domain_attr = {0};
 250     uint64_t required_caps;
 251 
 252     switch (mca_btl_ofi_component.mode) {
 253 
 254         case MCA_BTL_OFI_MODE_TWO_SIDED:
 255             mca_btl_ofi_component.two_sided_enabled = true;
 256             required_caps = MCA_BTL_OFI_TWO_SIDED_REQUIRED_CAPS;
 257             break;
 258 
 259         case MCA_BTL_OFI_MODE_FULL_SUPPORT:
 260             mca_btl_ofi_component.two_sided_enabled = true;
 261             required_caps = MCA_BTL_OFI_ONE_SIDED_REQUIRED_CAPS |
 262                             MCA_BTL_OFI_TWO_SIDED_REQUIRED_CAPS;
 263             break;
 264 
 265         default:
 266             /* default to only one sided. */
 267             required_caps = MCA_BTL_OFI_ONE_SIDED_REQUIRED_CAPS;
 268             break;
 269     }
 270 
 271     /* Select the provider */
 272     fabric_attr.prov_name = prov_include;
 273 
 274     domain_attr.mr_mode = MCA_BTL_OFI_REQUESTED_MR_MODE;
 275 
 276     /* message progression mode. */
 277     if (!strcmp(ofi_progress_mode, "auto")) {
 278         progress_mode = FI_PROGRESS_AUTO;
 279     } else if (!strcmp(ofi_progress_mode, "manual")) {
 280         progress_mode = FI_PROGRESS_MANUAL;
 281     } else {
 282         progress_mode = FI_PROGRESS_UNSPEC;
 283     }
 284 
 285     domain_attr.control_progress = progress_mode;
 286     domain_attr.data_progress = progress_mode;
 287 
 288     /* select endpoint type */
 289     ep_attr.type = FI_EP_RDM;
 290 
 291     /* ask for capabilities */
 292     /* TODO: catch the caps here. */
 293     hints.caps = required_caps;
 294     hints.mode = FI_CONTEXT;
 295 
 296     /* Ask for completion context */
 297     hints.mode = FI_CONTEXT;
 298 
 299     hints.fabric_attr = &fabric_attr;
 300     hints.domain_attr = &domain_attr;
 301     hints.ep_attr = &ep_attr;
 302     hints.tx_attr = &tx_attr;
 303     hints.rx_attr = &rx_attr;
 304 
 305     /* for now */
 306     tx_attr.iov_limit = 1;
 307     rx_attr.iov_limit = 1;
 308 
 309     tx_attr.op_flags = FI_DELIVERY_COMPLETE;
 310 
 311     mca_btl_ofi_component.module_count = 0;
 312 
 313     /* do the query. */
 314     rc = fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, &hints, &info_list);
 315     if (0 != rc) {
 316         BTL_VERBOSE(("fi_getinfo failed with code %d: %s",rc, fi_strerror(-rc)));
 317         return NULL;
 318     }
 319 
 320     /* count the number of resources/ */
 321     info = info_list;
 322     while(info) {
 323         resource_count++;
 324         info = info->next;
 325     }
 326     BTL_VERBOSE(("ofi btl found %d possible resources.", resource_count));
 327 
 328     info = info_list;
 329 
 330     while(info) {
 331         rc = validate_info(info, required_caps);
 332         if (OPAL_SUCCESS == rc) {
 333             /* Device passed sanity check, let's make a module.
 334              * We only pick the first device we found valid */
 335             rc = mca_btl_ofi_init_device(info);
 336             if (OPAL_SUCCESS == rc)
 337                 break;
 338         }
 339         info = info->next;
 340     }
 341 
 342     /* We are done with the returned info. */
 343     fi_freeinfo(info_list);
 344 
 345     /* pass module array back to caller */
 346     base_modules = calloc (mca_btl_ofi_component.module_count, sizeof (*base_modules));
 347     if (NULL == base_modules) {
 348         return NULL;
 349     }
 350 
 351     memcpy(base_modules, mca_btl_ofi_component.modules,
 352            mca_btl_ofi_component.module_count *sizeof (mca_btl_ofi_component.modules[0]));
 353 
 354     BTL_VERBOSE(("ofi btl initialization complete. found %d suitable transports",
 355                  mca_btl_ofi_component.module_count));
 356 
 357     *num_btl_modules = mca_btl_ofi_component.module_count;
 358 
 359     return base_modules;
 360 }
 361 
 362 static int mca_btl_ofi_init_device(struct fi_info *info)
 363 {
 364     int rc;
 365     int *module_count = &mca_btl_ofi_component.module_count;
 366     size_t namelen;
 367     size_t num_contexts_to_create;
 368 
 369     char *linux_device_name;
 370     char ep_name[FI_NAME_MAX];
 371 
 372     struct fi_info *ofi_info;
 373     struct fi_ep_attr *ep_attr;
 374     struct fi_domain_attr *domain_attr;
 375     struct fi_av_attr av_attr = {0};
 376     struct fid_fabric *fabric = NULL;
 377     struct fid_domain *domain = NULL;
 378     struct fid_ep *ep = NULL;
 379     struct fid_av *av = NULL;
 380 
 381     mca_btl_ofi_module_t *module;
 382 
 383     module = mca_btl_ofi_module_alloc(mca_btl_ofi_component.mode);
 384     if (NULL == module) {
 385         BTL_VERBOSE(("failed allocating ofi module"));
 386         goto fail;
 387     }
 388 
 389     /* If the user ask for two sided support, something bad is happening
 390      * to the MTL, so we will take maximum priority to supersede the MTL. */
 391     module->super.btl_exclusivity    = MCA_BTL_EXCLUSIVITY_DEFAULT;
 392 
 393     /* make a copy of the given info to store on the module */
 394     ofi_info = fi_dupinfo(info);
 395     ep_attr = ofi_info->ep_attr;
 396     domain_attr = ofi_info->domain_attr;
 397 
 398     linux_device_name = info->domain_attr->name;
 399     BTL_VERBOSE(("initializing dev:%s provider:%s",
 400                     linux_device_name,
 401                     info->fabric_attr->prov_name));
 402 
 403     /* fabric */
 404     rc = fi_fabric(ofi_info->fabric_attr, &fabric, NULL);
 405     if (0 != rc) {
 406         BTL_VERBOSE(("%s failed fi_fabric with err=%s",
 407                         linux_device_name,
 408                         fi_strerror(-rc)
 409                         ));
 410         goto fail;
 411     }
 412 
 413     /* domain */
 414     rc = fi_domain(fabric, ofi_info, &domain, NULL);
 415     if (0 != rc) {
 416         BTL_VERBOSE(("%s failed fi_domain with err=%s",
 417                         linux_device_name,
 418                         fi_strerror(-rc)
 419                         ));
 420         goto fail;
 421     }
 422 
 423     /* AV */
 424     av_attr.type = FI_AV_MAP;
 425     rc = fi_av_open(domain, &av_attr, &av, NULL);
 426     if (0 != rc) {
 427         BTL_VERBOSE(("%s failed fi_av_open with err=%s",
 428                         linux_device_name,
 429                         fi_strerror(-rc)
 430                         ));
 431         goto fail;
 432     }
 433 
 434     num_contexts_to_create = mca_btl_ofi_component.num_contexts_per_module;
 435 
 436     /* If the domain support scalable endpoint. */
 437     if (domain_attr->max_ep_tx_ctx > 1 && !disable_sep) {
 438 
 439         BTL_VERBOSE(("btl/ofi using scalable endpoint."));
 440 
 441         if (num_contexts_to_create > domain_attr->max_ep_tx_ctx) {
 442             BTL_VERBOSE(("cannot create requested %u contexts. (node max=%zu)",
 443                             module->num_contexts,
 444                             domain_attr->max_ep_tx_ctx));
 445             goto fail;
 446          }
 447 
 448         /* modify the info to let the provider know we are creating x contexts */
 449         ep_attr->tx_ctx_cnt = num_contexts_to_create;
 450         ep_attr->rx_ctx_cnt = num_contexts_to_create;
 451 
 452         /* create scalable endpoint */
 453         rc = fi_scalable_ep(domain, ofi_info, &ep, NULL);
 454         if (0 != rc) {
 455             BTL_VERBOSE(("%s failed fi_scalable_ep with err=%s",
 456                             linux_device_name,
 457                             fi_strerror(-rc)
 458                             ));
 459             goto fail;
 460         }
 461 
 462         module->num_contexts = num_contexts_to_create;
 463         module->is_scalable_ep = true;
 464 
 465         /* create contexts */
 466         module->contexts = mca_btl_ofi_context_alloc_scalable(ofi_info,
 467                                 domain, ep, av,
 468                                 num_contexts_to_create);
 469 
 470    } else {
 471         /* warn the user if they want more than 1 context */
 472         if (num_contexts_to_create > 1) {
 473             BTL_ERROR(("cannot create %zu contexts as the provider does not support "
 474                         "scalable endpoint. Falling back to single context endpoint.",
 475                         num_contexts_to_create));
 476         }
 477 
 478         BTL_VERBOSE(("btl/ofi using normal endpoint."));
 479 
 480         rc = fi_endpoint(domain, ofi_info, &ep, NULL);
 481         if (0 != rc) {
 482             BTL_VERBOSE(("%s failed fi_endpoint with err=%s",
 483                             linux_device_name,
 484                             fi_strerror(-rc)
 485                             ));
 486             goto fail;
 487         }
 488 
 489         module->num_contexts = 1;
 490         module->is_scalable_ep = false;
 491 
 492         /* create contexts */
 493         module->contexts = mca_btl_ofi_context_alloc_normal(ofi_info,
 494                                                             domain, ep, av);
 495     }
 496 
 497     if (NULL == module->contexts) {
 498         /* error message is already printed */
 499         goto fail;
 500     }
 501 
 502     /* enable the endpoint for using */
 503     rc = fi_enable(ep);
 504     if (0 != rc) {
 505         BTL_VERBOSE(("%s failed fi_enable with err=%s",
 506                         linux_device_name,
 507                         fi_strerror(-rc)
 508                         ));
 509         goto fail;
 510     }
 511 
 512     /* Everything succeeded, lets create a module for this device. */
 513     /* store the information. */
 514     module->fabric_info = ofi_info;
 515     module->fabric = fabric;
 516     module->domain = domain;
 517     module->av = av;
 518     module->ofi_endpoint = ep;
 519     module->linux_device_name = linux_device_name;
 520     module->outstanding_rdma = 0;
 521     module->use_virt_addr = false;
 522 
 523     if (ofi_info->domain_attr->mr_mode == FI_MR_BASIC ||
 524         ofi_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) {
 525         module->use_virt_addr = true;
 526     }
 527 
 528     /* initialize the rcache */
 529     mca_btl_ofi_rcache_init(module);
 530 
 531     /* create endpoint list */
 532     OBJ_CONSTRUCT(&module->endpoints, opal_list_t);
 533     OBJ_CONSTRUCT(&module->module_lock, opal_mutex_t);
 534     OBJ_CONSTRUCT(&module->id_to_endpoint, opal_hash_table_t);
 535 
 536     rc = opal_hash_table_init (&module->id_to_endpoint, 512);
 537     if (OPAL_SUCCESS != rc) {
 538         BTL_ERROR(("error initializing hash table."));
 539         goto fail;
 540     }
 541 
 542     /* create and send the modex for this device */
 543     namelen = sizeof(ep_name);
 544     rc = fi_getname((fid_t)ep, &ep_name[0], &namelen);
 545     if (0 != rc) {
 546         BTL_VERBOSE(("%s failed fi_getname with err=%s",
 547                         linux_device_name,
 548                         fi_strerror(-rc)
 549                         ));
 550         goto fail;
 551     }
 552 
 553 
 554     /* If we have two-sided support. */
 555     if (TWO_SIDED_ENABLED) {
 556 
 557         /* post wildcard recvs */
 558         for (int i=0; i < module->num_contexts; i++) {
 559             rc = mca_btl_ofi_post_recvs((mca_btl_base_module_t*) module,
 560                                         &module->contexts[i],
 561                                         mca_btl_ofi_component.rd_num);
 562             if (OPAL_SUCCESS != rc) {
 563                 goto fail;
 564             }
 565         }
 566     }
 567 
 568     /* post our endpoint name so peer can use it to connect to us */
 569     OPAL_MODEX_SEND(rc,
 570                     OPAL_PMIX_GLOBAL,
 571                     &mca_btl_ofi_component.super.btl_version,
 572                     &ep_name,
 573                     namelen);
 574     mca_btl_ofi_component.namelen = namelen;
 575 
 576     /* add this module to the list */
 577     mca_btl_ofi_component.modules[(*module_count)++] = module;
 578 
 579     return OPAL_SUCCESS;
 580 
 581 fail:
 582     /* clean up */
 583 
 584     /* if the contexts have not been initiated, num_contexts should
 585      * be zero and we skip this. */
 586     for (int i=0; i < module->num_contexts; i++) {
 587         mca_btl_ofi_context_finalize(&module->contexts[i], module->is_scalable_ep);
 588     }
 589     free(module->contexts);
 590 
 591     if (NULL != av) {
 592         fi_close(&av->fid);
 593     }
 594 
 595     if (NULL != ep) {
 596         fi_close(&ep->fid);
 597     }
 598 
 599     if (NULL != domain) {
 600         fi_close(&domain->fid);
 601     }
 602 
 603     if (NULL != fabric) {
 604         fi_close(&fabric->fid);
 605     }
 606     free(module);
 607 
 608     /* not really a failure. just skip this device. */
 609     return OPAL_ERR_OUT_OF_RESOURCE;
 610 }
 611 
 612 /**
 613  * @brief OFI BTL progress function
 614  *
 615  * This function explictly progresses all workers.
 616  */
 617 static int mca_btl_ofi_component_progress (void)
 618 {
 619     int events = 0;
 620     mca_btl_ofi_context_t *context;
 621 
 622     for (int i = 0 ; i < mca_btl_ofi_component.module_count ; ++i) {
 623         mca_btl_ofi_module_t *module = mca_btl_ofi_component.modules[i];
 624 
 625         /* progress context we own first. */
 626         context = get_ofi_context(module);
 627 
 628         if (mca_btl_ofi_context_trylock(context)) {
 629             events += mca_btl_ofi_context_progress(context);
 630             mca_btl_ofi_context_unlock(context);
 631         }
 632 
 633         /* if there is nothing to do, try progress other's. */
 634         if (events == 0) {
 635             for (int j = 0 ; j < module->num_contexts ; j++ ) {
 636 
 637                 context = get_ofi_context_rr(module);
 638 
 639                 if (mca_btl_ofi_context_trylock(context)) {
 640                     events += mca_btl_ofi_context_progress(context);
 641                     mca_btl_ofi_context_unlock(context);
 642                 }
 643 
 644                 /* If we did something, good enough. return now.
 645                  * This is crucial for performance/latency. */
 646                 if (events > 0) {
 647                     break;
 648                 }
 649             }
 650         }
 651     }
 652 
 653     return events;
 654 }
 655 
 656 /** OFI btl component */
 657 mca_btl_ofi_component_t mca_btl_ofi_component = {
 658     .super = {
 659         .btl_version = {
 660             MCA_BTL_DEFAULT_VERSION("ofi"),
 661             .mca_open_component = mca_btl_ofi_component_open,
 662             .mca_close_component = mca_btl_ofi_component_close,
 663             .mca_register_component_params = mca_btl_ofi_component_register,
 664         },
 665         .btl_data = {
 666             /* The component is not checkpoint ready */
 667             .param_field = MCA_BASE_METADATA_PARAM_NONE
 668         },
 669 
 670         .btl_init = mca_btl_ofi_component_init,
 671         .btl_progress = mca_btl_ofi_component_progress,
 672     },
 673 };

/* [<][>][^][v][top][bottom][index][help] */