root/ompi/mca/mtl/ofi/mtl_ofi_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_mtl_ofi_component_register
  2. ompi_mtl_ofi_component_open
  3. ompi_mtl_ofi_component_query
  4. ompi_mtl_ofi_component_close
  5. ompi_mtl_ofi_progress_no_inline
  6. is_in_list
  7. select_ofi_provider
  8. ompi_mtl_ofi_check_fi_remote_cq_data
  9. ompi_mtl_ofi_define_tag_mode
  10. ompi_mtl_ofi_init_sep
  11. ompi_mtl_ofi_init_regular_ep
  12. ompi_mtl_ofi_component_init
  13. ompi_mtl_ofi_finalize

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2013-2018 Intel, Inc. All rights reserved
   4  *
   5  * Copyright (c) 2014-2017 Cisco Systems, Inc.  All rights reserved
   6  * Copyright (c) 2015-2016 Los Alamos National Security, LLC.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2018      Amazon.com, Inc. or its affiliates.  All Rights reserved.
   9  * $COPYRIGHT$
  10  *
  11  * Additional copyrights may follow
  12  *
  13  * $HEADER$
  14  */
  15 
  16 #include "mtl_ofi.h"
  17 #include "opal/util/argv.h"
  18 #include "opal/util/printf.h"
  19 
  20 static int ompi_mtl_ofi_component_open(void);
  21 static int ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority);
  22 static int ompi_mtl_ofi_component_close(void);
  23 static int ompi_mtl_ofi_component_register(void);
  24 
  25 static mca_mtl_base_module_t*
  26 ompi_mtl_ofi_component_init(bool enable_progress_threads,
  27                             bool enable_mpi_threads);
  28 
  29 static int param_priority;
  30 static char *prov_include;
  31 static char *prov_exclude;
  32 static int control_progress;
  33 static int data_progress;
  34 static int av_type;
  35 static int ofi_tag_mode;
  36 
  37 #if OPAL_HAVE_THREAD_LOCAL
  38     opal_thread_local int per_thread_ctx;
  39     opal_thread_local struct fi_cq_tagged_entry wc[MTL_OFI_MAX_PROG_EVENT_COUNT];
  40 #endif
  41 
  42 /*
  43  * Enumerators
  44  */
  45 
  46 enum {
  47     MTL_OFI_PROG_AUTO=1,
  48     MTL_OFI_PROG_MANUAL,
  49     MTL_OFI_PROG_UNSPEC,
  50 };
  51 
  52 mca_base_var_enum_value_t control_prog_type[] = {
  53     {MTL_OFI_PROG_AUTO, "auto"},
  54     {MTL_OFI_PROG_MANUAL, "manual"},
  55     {MTL_OFI_PROG_UNSPEC, "unspec"},
  56     {0, NULL}
  57 };
  58 
  59 mca_base_var_enum_value_t data_prog_type[] = {
  60     {MTL_OFI_PROG_AUTO, "auto"},
  61     {MTL_OFI_PROG_MANUAL, "manual"},
  62     {MTL_OFI_PROG_UNSPEC, "unspec"},
  63     {0, NULL}
  64 };
  65 
  66 enum {
  67     MTL_OFI_AV_MAP=1,
  68     MTL_OFI_AV_TABLE,
  69     MTL_OFI_AV_UNKNOWN,
  70 };
  71 
  72 mca_base_var_enum_value_t av_table_type[] = {
  73     {MTL_OFI_AV_MAP, "map"},
  74     {MTL_OFI_AV_TABLE, "table"},
  75     {0, NULL}
  76 };
  77 
  78 enum {
  79     MTL_OFI_TAG_AUTO=1,
  80     MTL_OFI_TAG_1,
  81     MTL_OFI_TAG_2,
  82     MTL_OFI_TAG_FULL,
  83 };
  84 
  85 mca_base_var_enum_value_t ofi_tag_mode_type[] = {
  86     {MTL_OFI_TAG_AUTO, "auto"},
  87     {MTL_OFI_TAG_1, "ofi_tag_1"},
  88     {MTL_OFI_TAG_2, "ofi_tag_2"},
  89     {MTL_OFI_TAG_FULL, "ofi_tag_full"},
  90     {0, NULL}
  91 };
  92 
  93 mca_mtl_ofi_component_t mca_mtl_ofi_component = {
  94     {
  95 
  96         /* First, the mca_base_component_t struct containing meta
  97          * information about the component itself */
  98 
  99         .mtl_version = {
 100             MCA_MTL_BASE_VERSION_2_0_0,
 101 
 102             .mca_component_name = "ofi",
 103             OFI_COMPAT_MCA_VERSION,
 104             .mca_open_component = ompi_mtl_ofi_component_open,
 105             .mca_close_component = ompi_mtl_ofi_component_close,
 106             .mca_query_component = ompi_mtl_ofi_component_query,
 107             .mca_register_component_params = ompi_mtl_ofi_component_register,
 108         },
 109         .mtl_data = {
 110             /* The component is not checkpoint ready */
 111             MCA_BASE_METADATA_PARAM_NONE
 112         },
 113 
 114         .mtl_init = ompi_mtl_ofi_component_init,
 115     }
 116 };
 117 
 118 static int
 119 ompi_mtl_ofi_component_register(void)
 120 {
 121     int ret;
 122     mca_base_var_enum_t *new_enum = NULL;
 123     char *desc;
 124 
 125     param_priority = 25;   /* for now give a lower priority than the psm mtl */
 126     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 127                                     "priority", "Priority of the OFI MTL component",
 128                                     MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 129                                     OPAL_INFO_LVL_9,
 130                                     MCA_BASE_VAR_SCOPE_READONLY,
 131                                     &param_priority);
 132 
 133     prov_include = NULL;
 134     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 135                                     "provider_include",
 136                                     "Comma-delimited list of OFI providers that are considered for use (e.g., \"psm,psm2\"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude.",
 137                                     MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 138                                     OPAL_INFO_LVL_1,
 139                                     MCA_BASE_VAR_SCOPE_READONLY,
 140                                     &prov_include);
 141 
 142     prov_exclude = "shm,sockets,tcp,udp,rstream";
 143     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 144                                     "provider_exclude",
 145                                     "Comma-delimited list of OFI providers that are not considered for use (default: \"sockets,mxm\"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include.",
 146                                     MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 147                                     OPAL_INFO_LVL_1,
 148                                     MCA_BASE_VAR_SCOPE_READONLY,
 149                                     &prov_exclude);
 150 
 151     ompi_mtl_ofi.ofi_progress_event_count = MTL_OFI_MAX_PROG_EVENT_COUNT;
 152     opal_asprintf(&desc, "Max number of events to read each call to OFI progress (default: %d events will be read per OFI progress call)", ompi_mtl_ofi.ofi_progress_event_count);
 153     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 154                                     "progress_event_cnt",
 155                                     desc,
 156                                     MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 157                                     OPAL_INFO_LVL_6,
 158                                     MCA_BASE_VAR_SCOPE_READONLY,
 159                                     &ompi_mtl_ofi.ofi_progress_event_count);
 160 
 161     free(desc);
 162 
 163     ret = mca_base_var_enum_create ("ofi_tag_mode_type", ofi_tag_mode_type , &new_enum);
 164     if (OPAL_SUCCESS != ret) {
 165         return ret;
 166     }
 167 
 168     ofi_tag_mode = MTL_OFI_TAG_AUTO;
 169     opal_asprintf(&desc, "Mode specifying how many bits to use for various MPI values in OFI/Libfabric"
 170             " communications. Some Libfabric provider network types can support most of Open MPI"
 171             " needs; others can only supply a limited number of bits, which then must be split"
 172             " across the MPI communicator ID, MPI source rank, and MPI tag. Three different"
 173             " splitting schemes are available: ofi_tag_full (%d bits for the communicator, %d bits"
 174             " for the source rank, and %d bits for the tag), ofi_tag_1 (%d bits for the communicator"
 175             ", %d bits source rank, %d bits tag), ofi_tag_2 (%d bits for the communicator"
 176             ", %d bits source rank, %d bits tag). By default, this MCA variable is set to \"auto\","
 177             " which will first try to use ofi_tag_full, and if that fails, fall back to ofi_tag_1.",
 178             MTL_OFI_CID_BIT_COUNT_DATA, 32, MTL_OFI_TAG_BIT_COUNT_DATA,
 179             MTL_OFI_CID_BIT_COUNT_1, MTL_OFI_SOURCE_BIT_COUNT_1, MTL_OFI_TAG_BIT_COUNT_1,
 180             MTL_OFI_CID_BIT_COUNT_2, MTL_OFI_SOURCE_BIT_COUNT_2, MTL_OFI_TAG_BIT_COUNT_2);
 181 
 182     mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version,
 183                                     "tag_mode",
 184                                      desc,
 185                                      MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
 186                                      OPAL_INFO_LVL_6,
 187                                      MCA_BASE_VAR_SCOPE_READONLY,
 188                                      &ofi_tag_mode);
 189 
 190     free(desc);
 191     OBJ_RELEASE(new_enum);
 192 
 193     ret = mca_base_var_enum_create ("control_prog_type", control_prog_type, &new_enum);
 194     if (OPAL_SUCCESS != ret) {
 195         return ret;
 196     }
 197 
 198     control_progress = MTL_OFI_PROG_UNSPEC;
 199     mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version,
 200                                      "control_progress",
 201                                      "Specify control progress model (default: unspecificed, use provider's default). Set to auto or manual for auto or manual progress respectively.",
 202                                      MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
 203                                      OPAL_INFO_LVL_3,
 204                                      MCA_BASE_VAR_SCOPE_READONLY,
 205                                      &control_progress);
 206     OBJ_RELEASE(new_enum);
 207 
 208     ret = mca_base_var_enum_create ("data_prog_type", data_prog_type, &new_enum);
 209     if (OPAL_SUCCESS != ret) {
 210         return ret;
 211     }
 212 
 213     data_progress = MTL_OFI_PROG_UNSPEC;
 214     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 215                                     "data_progress",
 216                                     "Specify data progress model (default: unspecified, use provider's default). Set to auto or manual for auto or manual progress respectively.",
 217                                     MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
 218                                     OPAL_INFO_LVL_3,
 219                                     MCA_BASE_VAR_SCOPE_READONLY,
 220                                     &data_progress);
 221     OBJ_RELEASE(new_enum);
 222 
 223     ret = mca_base_var_enum_create ("av_type", av_table_type, &new_enum);
 224     if (OPAL_SUCCESS != ret) {
 225         return ret;
 226     }
 227 
 228     av_type = MTL_OFI_AV_MAP;
 229     mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version,
 230                                      "av",
 231                                      "Specify AV type to use (default: map). Set to table for FI_AV_TABLE AV type.",
 232                                      MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
 233                                      OPAL_INFO_LVL_3,
 234                                      MCA_BASE_VAR_SCOPE_READONLY,
 235                                      &av_type);
 236     OBJ_RELEASE(new_enum);
 237 
 238     ompi_mtl_ofi.enable_sep = 0;
 239     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 240                                     "enable_sep",
 241                                     "Enable SEP feature",
 242                                     MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 243                                     OPAL_INFO_LVL_3,
 244                                     MCA_BASE_VAR_SCOPE_READONLY,
 245                                     &ompi_mtl_ofi.enable_sep);
 246 
 247     ompi_mtl_ofi.thread_grouping = 0;
 248     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 249                                     "thread_grouping",
 250                                     "Enable/Disable Thread Grouping feature",
 251                                     MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 252                                     OPAL_INFO_LVL_3,
 253                                     MCA_BASE_VAR_SCOPE_READONLY,
 254                                     &ompi_mtl_ofi.thread_grouping);
 255 
 256     /*
 257      * Default Policy: Create 1 context and let user ask for more for
 258      * multi-threaded workloads. User needs to ask for as many contexts as the
 259      * number of threads that are anticipated to make MPI calls.
 260      */
 261     ompi_mtl_ofi.num_ofi_contexts = 1;
 262     mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
 263                                     "num_ctxts",
 264                                     "Specify number of OFI contexts to create",
 265                                     MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 266                                     OPAL_INFO_LVL_4,
 267                                     MCA_BASE_VAR_SCOPE_READONLY,
 268                                     &ompi_mtl_ofi.num_ofi_contexts);
 269 
 270     return OMPI_SUCCESS;
 271 }
 272 
 273 
 274 
 275 static int
 276 ompi_mtl_ofi_component_open(void)
 277 {
 278     ompi_mtl_ofi.base.mtl_request_size =
 279         sizeof(ompi_mtl_ofi_request_t) - sizeof(struct mca_mtl_request_t);
 280 
 281     ompi_mtl_ofi.domain =  NULL;
 282     ompi_mtl_ofi.av     =  NULL;
 283     ompi_mtl_ofi.sep     =  NULL;
 284 
 285     /**
 286      * Sanity check: provider_include and provider_exclude must be mutually
 287      * exclusive
 288      */
 289     if (OMPI_SUCCESS !=
 290         mca_base_var_check_exclusive("ompi",
 291             mca_mtl_ofi_component.super.mtl_version.mca_type_name,
 292             mca_mtl_ofi_component.super.mtl_version.mca_component_name,
 293             "provider_include",
 294             mca_mtl_ofi_component.super.mtl_version.mca_type_name,
 295             mca_mtl_ofi_component.super.mtl_version.mca_component_name,
 296             "provider_exclude")) {
 297         return OMPI_ERR_NOT_AVAILABLE;
 298     }
 299 
 300     return OMPI_SUCCESS;
 301 }
 302 
 303 static int
 304 ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority)
 305 {
 306     *priority = param_priority;
 307     *module = (mca_base_module_t *)&ompi_mtl_ofi.base;
 308     return OMPI_SUCCESS;
 309 }
 310 
 311 static int
 312 ompi_mtl_ofi_component_close(void)
 313 {
 314     return OMPI_SUCCESS;
 315 }
 316 
 317 int
 318 ompi_mtl_ofi_progress_no_inline(void)
 319 {
 320         return ompi_mtl_ofi_progress();
 321 }
 322 
 323 static int
 324 is_in_list(char **list, char *item)
 325 {
 326     int i = 0;
 327 
 328     if ((NULL == list) || (NULL == item)) {
 329         return 0;
 330     }
 331 
 332     while (NULL != list[i]) {
 333         if (0 == strncmp(item, list[i], strlen(list[i]))) {
 334             return 1;
 335         } else {
 336             i++;
 337         }
 338     }
 339 
 340     return 0;
 341 }
 342 
 343 static struct fi_info*
 344 select_ofi_provider(struct fi_info *providers)
 345 {
 346     char **include_list = NULL;
 347     char **exclude_list = NULL;
 348     struct fi_info *prov = providers;
 349 
 350     opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 351                         "%s:%d: mtl:ofi:provider_include = \"%s\"\n",
 352                         __FILE__, __LINE__, prov_include);
 353     opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 354                         "%s:%d: mtl:ofi:provider_exclude = \"%s\"\n",
 355                         __FILE__, __LINE__, prov_exclude);
 356 
 357     if (NULL != prov_include) {
 358         include_list = opal_argv_split(prov_include, ',');
 359         while ((NULL != prov) &&
 360                (!is_in_list(include_list, prov->fabric_attr->prov_name))) {
 361             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 362                                 "%s:%d: mtl:ofi: \"%s\" not in include list\n",
 363                                 __FILE__, __LINE__,
 364                                 prov->fabric_attr->prov_name);
 365             prov = prov->next;
 366         }
 367     } else if (NULL != prov_exclude) {
 368         exclude_list = opal_argv_split(prov_exclude, ',');
 369         while ((NULL != prov) &&
 370                (is_in_list(exclude_list, prov->fabric_attr->prov_name))) {
 371             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 372                                 "%s:%d: mtl:ofi: \"%s\" in exclude list\n",
 373                                 __FILE__, __LINE__,
 374                                 prov->fabric_attr->prov_name);
 375             prov = prov->next;
 376         }
 377     }
 378 
 379     opal_argv_free(include_list);
 380     opal_argv_free(exclude_list);
 381 
 382     opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 383                         "%s:%d: mtl:ofi:prov: %s\n",
 384                         __FILE__, __LINE__,
 385                         (prov ? prov->fabric_attr->prov_name : "none"));
 386 
 387     return prov;
 388 }
 389 
 390 /* Check if FI_REMOTE_CQ_DATA is supported, if so send the source rank there
 391  * FI_DIRECTED_RECV is also needed so receives can discrimate the source
 392  */
 393 static int
 394 ompi_mtl_ofi_check_fi_remote_cq_data(int fi_version,
 395                                      struct fi_info *hints,
 396                                      struct fi_info *provider,
 397                                      struct fi_info **prov_cq_data)
 398 {
 399     int ret;
 400     char *provider_name;
 401     struct fi_info *hints_dup;
 402     hints_dup = fi_dupinfo(hints);
 403 
 404     provider_name = strdup(provider->fabric_attr->prov_name);
 405     hints_dup->fabric_attr->prov_name = provider_name;
 406     hints_dup->caps |= FI_TAGGED | FI_DIRECTED_RECV;
 407     /* Ask for the size that OMPI uses for the source rank number */
 408     hints_dup->domain_attr->cq_data_size = sizeof(int);
 409     ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints_dup, prov_cq_data);
 410 
 411     if ((0 != ret) && (-FI_ENODATA != ret)) {
 412         opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
 413                        "fi_getinfo",
 414                        ompi_process_info.nodename, __FILE__, __LINE__,
 415                        fi_strerror(-ret), -ret);
 416         return ret;
 417     } else if (-FI_ENODATA == ret) {
 418         /* The provider does not support  FI_REMOTE_CQ_DATA */
 419         prov_cq_data = NULL;
 420     }
 421 
 422     fi_freeinfo(hints_dup);
 423     return OMPI_SUCCESS;
 424 }
 425 
 426 static void
 427 ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) {
 428     switch (ofi_tag_mode) {
 429         case MTL_OFI_TAG_1:
 430             *bits_for_cid = (int) MTL_OFI_CID_BIT_COUNT_1;
 431             ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1)) - 1);
 432 
 433             ompi_mtl_ofi.source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_1;
 434             ompi_mtl_ofi.num_bits_source_rank = MTL_OFI_SOURCE_BIT_COUNT_1;
 435             ompi_mtl_ofi.source_rank_mask = MTL_OFI_SOURCE_MASK_1;
 436 
 437             ompi_mtl_ofi.mpi_tag_mask = MTL_OFI_TAG_MASK_1;
 438             ompi_mtl_ofi.num_bits_mpi_tag = MTL_OFI_TAG_BIT_COUNT_1;
 439 
 440             ompi_mtl_ofi.sync_send = MTL_OFI_SYNC_SEND_1;
 441             ompi_mtl_ofi.sync_send_ack = MTL_OFI_SYNC_SEND_ACK_1;
 442             ompi_mtl_ofi.sync_proto_mask = MTL_OFI_PROTO_MASK_1;
 443         break;
 444         case MTL_OFI_TAG_2:
 445             *bits_for_cid = (int) MTL_OFI_CID_BIT_COUNT_2;
 446             ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_2 - 1)) - 1);
 447 
 448             ompi_mtl_ofi.source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_2;
 449             ompi_mtl_ofi.num_bits_source_rank = MTL_OFI_SOURCE_BIT_COUNT_2;
 450             ompi_mtl_ofi.source_rank_mask = MTL_OFI_SOURCE_MASK_2;
 451 
 452             ompi_mtl_ofi.mpi_tag_mask = MTL_OFI_TAG_MASK_2;
 453             ompi_mtl_ofi.num_bits_mpi_tag = MTL_OFI_TAG_BIT_COUNT_2;
 454 
 455             ompi_mtl_ofi.sync_send = MTL_OFI_SYNC_SEND_2;
 456             ompi_mtl_ofi.sync_send_ack = MTL_OFI_SYNC_SEND_ACK_2;
 457             ompi_mtl_ofi.sync_proto_mask = MTL_OFI_PROTO_MASK_2;
 458         break;
 459         default: /* use FI_REMOTE_CQ_DATA */
 460             *bits_for_cid = (int) MTL_OFI_CID_BIT_COUNT_DATA;
 461             ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_DATA - 1)) - 1);
 462 
 463             ompi_mtl_ofi.mpi_tag_mask = MTL_OFI_TAG_MASK_DATA;
 464 
 465             ompi_mtl_ofi.sync_send = MTL_OFI_SYNC_SEND_DATA;
 466             ompi_mtl_ofi.sync_send_ack = MTL_OFI_SYNC_SEND_ACK_DATA;
 467             ompi_mtl_ofi.sync_proto_mask = MTL_OFI_PROTO_MASK_DATA;
 468     }
 469 }
 470 
 471 #define MTL_OFI_ALLOC_COMM_TO_CONTEXT(arr_size)                                         \
 472     do {                                                                                \
 473         ompi_mtl_ofi.comm_to_context = calloc(arr_size, sizeof(int));                   \
 474         if (OPAL_UNLIKELY(!ompi_mtl_ofi.comm_to_context)) {                             \
 475             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,            \
 476                                    "%s:%d: alloc of comm_to_context array failed: %s\n",\
 477                                    __FILE__, __LINE__, strerror(errno));                \
 478             return ret;                                                                 \
 479         }                                                                               \
 480     } while (0);
 481 
 482 #define MTL_OFI_ALLOC_OFI_CTXTS()                                                           \
 483     do {                                                                                    \
 484         ompi_mtl_ofi.ofi_ctxt = (mca_mtl_ofi_context_t *) malloc(ompi_mtl_ofi.num_ofi_contexts * \
 485                                                           sizeof(mca_mtl_ofi_context_t));   \
 486         if (OPAL_UNLIKELY(!ompi_mtl_ofi.ofi_ctxt)) {                                        \
 487             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,                \
 488                                    "%s:%d: alloc of ofi_ctxt array failed: %s\n",           \
 489                                    __FILE__, __LINE__, strerror(errno));                    \
 490             return ret;                                                                     \
 491         }                                                                                   \
 492     } while(0);
 493 
 494 static int ompi_mtl_ofi_init_sep(struct fi_info *prov, int universe_size)
 495 {
 496     int ret = OMPI_SUCCESS, num_ofi_ctxts;
 497     struct fi_av_attr av_attr = {0};
 498 
 499     prov->ep_attr->tx_ctx_cnt = prov->ep_attr->rx_ctx_cnt =
 500                                 ompi_mtl_ofi.num_ofi_contexts;
 501 
 502     ret = fi_scalable_ep(ompi_mtl_ofi.domain, prov, &ompi_mtl_ofi.sep, NULL);
 503     if (0 != ret) {
 504         opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
 505                        "fi_scalable_ep",
 506                        ompi_process_info.nodename, __FILE__, __LINE__,
 507                        fi_strerror(-ret), -ret);
 508         return ret;
 509     }
 510 
 511     ompi_mtl_ofi.rx_ctx_bits = 0;
 512     while (ompi_mtl_ofi.num_ofi_contexts >> ++ompi_mtl_ofi.rx_ctx_bits);
 513 
 514     av_attr.type = (MTL_OFI_AV_TABLE == av_type) ? FI_AV_TABLE: FI_AV_MAP;
 515     av_attr.rx_ctx_bits = ompi_mtl_ofi.rx_ctx_bits;
 516     av_attr.count = ompi_mtl_ofi.num_ofi_contexts * universe_size;
 517     ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL);
 518 
 519     if (0 != ret) {
 520         MTL_OFI_LOG_FI_ERR(ret, "fi_av_open failed");
 521         return ret;
 522     }
 523 
 524     ret = fi_scalable_ep_bind(ompi_mtl_ofi.sep, (fid_t)ompi_mtl_ofi.av, 0);
 525     if (0 != ret) {
 526         MTL_OFI_LOG_FI_ERR(ret, "fi_bind AV-EP failed");
 527         return ret;
 528     }
 529 
 530     /*
 531      * If SEP supported and Thread Grouping feature enabled, use
 532      * num_ofi_contexts + 2. Extra 2 items is to accomodate Open MPI contextid
 533      * numbering- COMM_WORLD is 0, COMM_SELF is 1. Other user created
 534      * Comm contextid values are assigned sequentially starting with 3.
 535      */
 536     num_ofi_ctxts = ompi_mtl_ofi.thread_grouping ?
 537                 ompi_mtl_ofi.num_ofi_contexts + 2 : 1;
 538     MTL_OFI_ALLOC_COMM_TO_CONTEXT(num_ofi_ctxts);
 539 
 540     ompi_mtl_ofi.total_ctxts_used = 0;
 541     ompi_mtl_ofi.threshold_comm_context_id = 0;
 542 
 543     /* Allocate memory for OFI contexts */
 544     MTL_OFI_ALLOC_OFI_CTXTS();
 545 
 546     return ret;
 547 }
 548 
 549 static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov, int universe_size)
 550 {
 551     int ret = OMPI_SUCCESS;
 552     struct fi_av_attr av_attr = {0};
 553     struct fi_cq_attr cq_attr = {0};
 554     cq_attr.format = FI_CQ_FORMAT_TAGGED;
 555     cq_attr.size = ompi_mtl_ofi.ofi_progress_event_count;
 556 
 557     /* Override any user defined setting */
 558     ompi_mtl_ofi.num_ofi_contexts = 1;
 559     ret = fi_endpoint(ompi_mtl_ofi.domain, /* In:  Domain object   */
 560                       prov,                /* In:  Provider        */
 561                       &ompi_mtl_ofi.sep,    /* Out: Endpoint object */
 562                       NULL);               /* Optional context     */
 563     if (0 != ret) {
 564         opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
 565                        "fi_endpoint",
 566                        ompi_process_info.nodename, __FILE__, __LINE__,
 567                        fi_strerror(-ret), -ret);
 568         return ret;
 569     }
 570 
 571     /**
 572      * Create the objects that will be bound to the endpoint.
 573      * The objects include:
 574      *     - address vector and completion queues
 575      */
 576     av_attr.type = (MTL_OFI_AV_TABLE == av_type) ? FI_AV_TABLE: FI_AV_MAP;
 577     av_attr.count = universe_size;
 578     ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL);
 579     if (ret) {
 580         MTL_OFI_LOG_FI_ERR(ret, "fi_av_open failed");
 581         return ret;
 582     }
 583 
 584     ret = fi_ep_bind(ompi_mtl_ofi.sep,
 585                      (fid_t)ompi_mtl_ofi.av,
 586                      0);
 587     if (0 != ret) {
 588         MTL_OFI_LOG_FI_ERR(ret, "fi_bind AV-EP failed");
 589         return ret;
 590     }
 591 
 592     MTL_OFI_ALLOC_COMM_TO_CONTEXT(1);
 593 
 594     /* Allocate memory for OFI contexts */
 595     MTL_OFI_ALLOC_OFI_CTXTS();
 596 
 597     ompi_mtl_ofi.ofi_ctxt[0].tx_ep = ompi_mtl_ofi.sep;
 598     ompi_mtl_ofi.ofi_ctxt[0].rx_ep = ompi_mtl_ofi.sep;
 599 
 600     ret = fi_cq_open(ompi_mtl_ofi.domain, &cq_attr, &ompi_mtl_ofi.ofi_ctxt[0].cq, NULL);
 601     if (ret) {
 602         MTL_OFI_LOG_FI_ERR(ret, "fi_cq_open failed");
 603         return ret;
 604     }
 605 
 606     /* Bind CQ to endpoint object */
 607     ret = fi_ep_bind(ompi_mtl_ofi.sep, (fid_t)ompi_mtl_ofi.ofi_ctxt[0].cq,
 608                      FI_TRANSMIT | FI_RECV | FI_SELECTIVE_COMPLETION);
 609     if (0 != ret) {
 610         MTL_OFI_LOG_FI_ERR(ret, "fi_bind CQ-EP failed");
 611         return ret;
 612     }
 613 
 614     return ret;
 615 }
 616 
 617 static mca_mtl_base_module_t*
 618 ompi_mtl_ofi_component_init(bool enable_progress_threads,
 619                             bool enable_mpi_threads)
 620 {
 621     int ret, fi_version;
 622     int num_local_ranks, sep_support_in_provider, max_ofi_ctxts;
 623     int ofi_tag_leading_zeros, ofi_tag_bits_for_cid;
 624     struct fi_info *hints;
 625     struct fi_info *providers = NULL;
 626     struct fi_info *prov = NULL;
 627     struct fi_info *prov_cq_data = NULL;
 628     char ep_name[FI_NAME_MAX] = {0};
 629     size_t namelen;
 630     int universe_size;
 631     char *univ_size_str;
 632 
 633     /**
 634      * Hints to filter providers
 635      * See man fi_getinfo for a list of all filters
 636      * mode:  Select capabilities MTL is prepared to support.
 637      *        In this case, MTL will pass in context into communication calls
 638      * ep_type:  reliable datagram operation
 639      * caps:     Capabilities required from the provider.
 640      *           Tag matching is specified to implement MPI semantics.
 641      * msg_order: Guarantee that messages with same tag are ordered.
 642      */
 643     hints = fi_allocinfo();
 644     if (!hints) {
 645         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 646                             "%s:%d: Could not allocate fi_info\n",
 647                             __FILE__, __LINE__);
 648         goto error;
 649     }
 650     hints->mode               = FI_CONTEXT;
 651     hints->ep_attr->type      = FI_EP_RDM;      /* Reliable datagram         */
 652     hints->caps               = FI_TAGGED;      /* Tag matching interface    */
 653     hints->tx_attr->msg_order = FI_ORDER_SAS;
 654     hints->rx_attr->msg_order = FI_ORDER_SAS;
 655     hints->rx_attr->op_flags = FI_COMPLETION;
 656     hints->tx_attr->op_flags = FI_COMPLETION;
 657 
 658     if (enable_mpi_threads) {
 659         ompi_mtl_ofi.mpi_thread_multiple = true;
 660         hints->domain_attr->threading = FI_THREAD_SAFE;
 661     } else {
 662         ompi_mtl_ofi.mpi_thread_multiple = false;
 663         hints->domain_attr->threading = FI_THREAD_DOMAIN;
 664     }
 665 
 666     switch (control_progress) {
 667     case MTL_OFI_PROG_AUTO:
 668         hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
 669         break;
 670     case MTL_OFI_PROG_MANUAL:
 671         hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
 672         break;
 673     default:
 674         hints->domain_attr->control_progress = FI_PROGRESS_UNSPEC;
 675     }
 676 
 677     switch (data_progress) {
 678     case MTL_OFI_PROG_AUTO:
 679         hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
 680         break;
 681     case MTL_OFI_PROG_MANUAL:
 682         hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
 683         break;
 684     default:
 685         hints->domain_attr->data_progress = FI_PROGRESS_UNSPEC;
 686     }
 687 
 688     if (MTL_OFI_AV_TABLE == av_type) {
 689         hints->domain_attr->av_type          = FI_AV_TABLE;
 690     } else {
 691         hints->domain_attr->av_type          = FI_AV_MAP;
 692     }
 693 
 694     hints->domain_attr->resource_mgmt    = FI_RM_ENABLED;
 695 
 696     /**
 697      * FI_VERSION provides binary backward and forward compatibility support
 698      * Specify the version of OFI is coded to, the provider will select struct
 699      * layouts that are compatible with this version.
 700      */
 701     fi_version = FI_VERSION(1, 0);
 702 
 703     /**
 704      * fi_getinfo:  returns information about fabric  services for reaching a
 705      * remote node or service.  this does not necessarily allocate resources.
 706      * Pass NULL for name/service because we want a list of providers supported.
 707      */
 708     ret = fi_getinfo(fi_version,    /* OFI version requested                    */
 709                      NULL,          /* Optional name or fabric to resolve       */
 710                      NULL,          /* Optional service name or port to request */
 711                      0ULL,          /* Optional flag                            */
 712                      hints,         /* In: Hints to filter providers            */
 713                      &providers);   /* Out: List of matching providers          */
 714     if (FI_ENODATA == -ret) {
 715         // It is not an error if no information is returned.
 716         goto error;
 717     } else if (0 != ret) {
 718         opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
 719                        "fi_getinfo",
 720                        ompi_process_info.nodename, __FILE__, __LINE__,
 721                        fi_strerror(-ret), -ret);
 722         goto error;
 723     }
 724 
 725     /**
 726      * Select a provider from the list returned by fi_getinfo().
 727      */
 728     prov = select_ofi_provider(providers);
 729     if (!prov) {
 730         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 731                             "%s:%d: select_ofi_provider: no provider found\n",
 732                             __FILE__, __LINE__);
 733         goto error;
 734     }
 735 
 736     /**
 737      * Select the format of the OFI tag
 738      */
 739     if ((MTL_OFI_TAG_AUTO == ofi_tag_mode) ||
 740         (MTL_OFI_TAG_FULL == ofi_tag_mode)) {
 741             ret = ompi_mtl_ofi_check_fi_remote_cq_data(fi_version,
 742                                                        hints, prov,
 743                                                        &prov_cq_data);
 744             if (OMPI_SUCCESS != ret) {
 745                 goto error;
 746             } else if (NULL == prov_cq_data) {
 747                 /* No support for FI_REMTOTE_CQ_DATA */
 748                 fi_freeinfo(prov_cq_data);
 749                 ompi_mtl_ofi.fi_cq_data = false;
 750                 if (MTL_OFI_TAG_AUTO == ofi_tag_mode) {
 751                    /* Fallback to MTL_OFI_TAG_1 */
 752                    ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_1, &ofi_tag_bits_for_cid);
 753                 } else { /* MTL_OFI_TAG_FULL */
 754                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 755                             "%s:%d: OFI provider %s does not support FI_REMOTE_CQ_DATA\n",
 756                             __FILE__, __LINE__, prov->fabric_attr->prov_name);
 757                     goto error;
 758                 }
 759             } else {
 760                 /* Use FI_REMTOTE_CQ_DATA */
 761                 ompi_mtl_ofi.fi_cq_data = true;
 762                 prov = prov_cq_data;
 763                 ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_FULL, &ofi_tag_bits_for_cid);
 764             }
 765     } else { /* MTL_OFI_TAG_1 or MTL_OFI_TAG_2 */
 766         ompi_mtl_ofi.fi_cq_data = false;
 767         ompi_mtl_ofi_define_tag_mode(ofi_tag_mode, &ofi_tag_bits_for_cid);
 768     }
 769 
 770     /**
 771      * Initialize the MTL OFI Symbol Tables & function pointers
 772      * for specialized functions.
 773      */
 774 
 775     ompi_mtl_ofi_send_symtable_init(&ompi_mtl_ofi.sym_table);
 776     ompi_mtl_ofi.base.mtl_send =
 777         ompi_mtl_ofi.sym_table.ompi_mtl_ofi_send[ompi_mtl_ofi.fi_cq_data];
 778 
 779     ompi_mtl_ofi_isend_symtable_init(&ompi_mtl_ofi.sym_table);
 780     ompi_mtl_ofi.base.mtl_isend =
 781         ompi_mtl_ofi.sym_table.ompi_mtl_ofi_isend[ompi_mtl_ofi.fi_cq_data];
 782 
 783     ompi_mtl_ofi_irecv_symtable_init(&ompi_mtl_ofi.sym_table);
 784     ompi_mtl_ofi.base.mtl_irecv =
 785         ompi_mtl_ofi.sym_table.ompi_mtl_ofi_irecv[ompi_mtl_ofi.fi_cq_data];
 786 
 787     ompi_mtl_ofi_iprobe_symtable_init(&ompi_mtl_ofi.sym_table);
 788     ompi_mtl_ofi.base.mtl_iprobe =
 789         ompi_mtl_ofi.sym_table.ompi_mtl_ofi_iprobe[ompi_mtl_ofi.fi_cq_data];
 790 
 791     ompi_mtl_ofi_improbe_symtable_init(&ompi_mtl_ofi.sym_table);
 792     ompi_mtl_ofi.base.mtl_improbe =
 793         ompi_mtl_ofi.sym_table.ompi_mtl_ofi_improbe[ompi_mtl_ofi.fi_cq_data];
 794 
 795     /**
 796      * Check for potential bits in the OFI tag that providers may be reserving
 797      * for internal usage (see mem_tag_format in fi_endpoint man page).
 798      */
 799 
 800     ofi_tag_leading_zeros = 0;
 801     while (!((prov->ep_attr->mem_tag_format << ofi_tag_leading_zeros++) &
 802            (uint64_t) MTL_OFI_HIGHEST_TAG_BIT) &&
 803            /* Do not keep looping if the provider does not support enough bits */
 804            (ofi_tag_bits_for_cid >= MTL_OFI_MINIMUM_CID_BITS)){
 805        ofi_tag_bits_for_cid--;
 806     }
 807 
 808     if (ofi_tag_bits_for_cid < MTL_OFI_MINIMUM_CID_BITS) {
 809         opal_show_help("help-mtl-ofi.txt", "Not enough bits for CID", true,
 810                        prov->fabric_attr->prov_name,
 811                        prov->fabric_attr->prov_name,
 812                        ompi_process_info.nodename, __FILE__, __LINE__);
 813         goto error;
 814     }
 815 
 816     /* Update the maximum supported Communicator ID */
 817     ompi_mtl_ofi.base.mtl_max_contextid = (int)((1ULL << ofi_tag_bits_for_cid) - 1);
 818     ompi_mtl_ofi.num_peers = 0;
 819 
 820     /* Check if Scalable Endpoints can be enabled for the provider */
 821     sep_support_in_provider = 0;
 822     if ((prov->domain_attr->max_ep_tx_ctx > 1) ||
 823         (prov->domain_attr->max_ep_rx_ctx > 1)) {
 824         sep_support_in_provider = 1;
 825     }
 826 
 827     if (1 == ompi_mtl_ofi.enable_sep) {
 828         if (0 == sep_support_in_provider) {
 829             opal_show_help("help-mtl-ofi.txt", "SEP unavailable", true,
 830                            prov->fabric_attr->prov_name,
 831                            ompi_process_info.nodename, __FILE__, __LINE__);
 832             goto error;
 833         } else if (1 == sep_support_in_provider) {
 834             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 835                                 "%s:%d: Scalable EP supported in %s provider. Enabling in MTL.\n",
 836                                 __FILE__, __LINE__, prov->fabric_attr->prov_name);
 837         }
 838     } else {
 839         /*
 840          * Scalable Endpoints is required for Thread Grouping feature
 841          */
 842         if (1 == ompi_mtl_ofi.thread_grouping) {
 843             opal_show_help("help-mtl-ofi.txt", "SEP required", true,
 844                            ompi_process_info.nodename, __FILE__, __LINE__);
 845             goto error;
 846         }
 847     }
 848 
 849     /**
 850      * Open fabric
 851      * The getinfo struct returns a fabric attribute struct that can be used to
 852      * instantiate the virtual or physical network. This opens a "fabric
 853      * provider". See man fi_fabric for details.
 854      */
 855     ret = fi_fabric(prov->fabric_attr,    /* In:  Fabric attributes             */
 856                     &ompi_mtl_ofi.fabric, /* Out: Fabric handle                 */
 857                     NULL);                /* Optional context for fabric events */
 858     if (0 != ret) {
 859         opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
 860                        "fi_fabric",
 861                        ompi_process_info.nodename, __FILE__, __LINE__,
 862                        fi_strerror(-ret), -ret);
 863         goto error;
 864     }
 865 
 866     /**
 867      * Create the access domain, which is the physical or virtual network or
 868      * hardware port/collection of ports.  Returns a domain object that can be
 869      * used to create endpoints.  See man fi_domain for details.
 870      */
 871     ret = fi_domain(ompi_mtl_ofi.fabric,  /* In:  Fabric object                 */
 872                     prov,                 /* In:  Provider                      */
 873                     &ompi_mtl_ofi.domain, /* Out: Domain oject                  */
 874                     NULL);                /* Optional context for domain events */
 875     if (0 != ret) {
 876         opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
 877                        "fi_domain",
 878                        ompi_process_info.nodename, __FILE__, __LINE__,
 879                        fi_strerror(-ret), -ret);
 880         goto error;
 881     }
 882 
 883     /**
 884      * Save the maximum inject size.
 885      */
 886     ompi_mtl_ofi.max_inject_size = prov->tx_attr->inject_size;
 887 
 888     /**
 889      * The user is not allowed to exceed MTL_OFI_MAX_PROG_EVENT_COUNT.
 890      * The reason is because progress entries array is now a TLS variable
 891      * as opposed to being allocated on the heap for thread-safety purposes.
 892      */
 893     if (ompi_mtl_ofi.ofi_progress_event_count > MTL_OFI_MAX_PROG_EVENT_COUNT) {
 894         ompi_mtl_ofi.ofi_progress_event_count = MTL_OFI_MAX_PROG_EVENT_COUNT;
 895      }
 896 
 897     /**
 898      * Create a transport level communication endpoint.  To use the endpoint,
 899      * it must be bound to the resources consumed by it such as address
 900      * vectors, completion counters or event queues etc, and enabled.
 901      * See man fi_endpoint for more details.
 902      */
 903 
 904     /* use the universe size as a rough guess on the address vector
 905      * size hint that should be passed to fi_av_open().  For regular
 906      * endpoints, the count will be the universe size.  For scalable
 907      * endpoints, the count will be the universe size multiplied by
 908      * the number of contexts.  In either case, if the universe grows
 909      * (via dynamic processes), the count is a hint, not a hard limit,
 910      * so libfabric will just be slightly less efficient.
 911      */
 912     univ_size_str = getenv("OMPI_UNIVERSE_SIZE");
 913     if (NULL == univ_size_str ||
 914         (universe_size = strtol(univ_size_str, NULL, 0)) <= 0) {
 915         universe_size = ompi_proc_world_size();
 916     }
 917 
 918     if (1 == ompi_mtl_ofi.enable_sep) {
 919         max_ofi_ctxts = (prov->domain_attr->max_ep_tx_ctx <
 920                          prov->domain_attr->max_ep_rx_ctx) ?
 921                          prov->domain_attr->max_ep_tx_ctx :
 922                          prov->domain_attr->max_ep_rx_ctx;
 923 
 924         num_local_ranks = 1 + ompi_process_info.num_local_peers;
 925         if (max_ofi_ctxts <= num_local_ranks) {
 926             opal_show_help("help-mtl-ofi.txt", "Local ranks exceed ofi contexts",
 927                            true, prov->fabric_attr->prov_name,
 928                            ompi_process_info.nodename, __FILE__, __LINE__);
 929             goto error;
 930         }
 931 
 932         /* Provision enough contexts to service all ranks in a node */
 933         max_ofi_ctxts /= num_local_ranks;
 934 
 935         /*
 936          *  If num ctxts user specified is more than max allowed, limit to max
 937          *  and start round-robining. Print warning to user.
 938          */
 939         if (max_ofi_ctxts < ompi_mtl_ofi.num_ofi_contexts) {
 940             opal_show_help("help-mtl-ofi.txt", "Ctxts exceeded available",
 941                            true, max_ofi_ctxts,
 942                            ompi_process_info.nodename, __FILE__, __LINE__);
 943             ompi_mtl_ofi.num_ofi_contexts = max_ofi_ctxts;
 944         }
 945 
 946         ret = ompi_mtl_ofi_init_sep(prov, universe_size);
 947     } else {
 948         ret = ompi_mtl_ofi_init_regular_ep(prov, universe_size);
 949     }
 950 
 951     if (OMPI_SUCCESS != ret) {
 952         goto error;
 953     }
 954 
 955     ompi_mtl_ofi.total_ctxts_used = 0;
 956     ompi_mtl_ofi.threshold_comm_context_id = 0;
 957 
 958     /* Enable Endpoint for communication */
 959     ret = fi_enable(ompi_mtl_ofi.sep);
 960     if (0 != ret) {
 961         MTL_OFI_LOG_FI_ERR(ret, "fi_enable failed");
 962         goto error;
 963     }
 964 
 965     ompi_mtl_ofi.provider_name = strdup(prov->fabric_attr->prov_name);
 966 
 967     /**
 968      * Free providers info since it's not needed anymore.
 969      */
 970     fi_freeinfo(hints);
 971     hints = NULL;
 972     fi_freeinfo(providers);
 973     providers = NULL;
 974 
 975     /**
 976      * Get our address and publish it with modex.
 977      */
 978     namelen = sizeof(ep_name);
 979     ret = fi_getname((fid_t)ompi_mtl_ofi.sep,
 980                      &ep_name[0],
 981                      &namelen);
 982     if (ret) {
 983         MTL_OFI_LOG_FI_ERR(ret, "fi_getname failed");
 984         goto error;
 985     }
 986 
 987     OFI_COMPAT_MODEX_SEND(ret,
 988                           &mca_mtl_ofi_component.super.mtl_version,
 989                           &ep_name,
 990                           namelen);
 991     if (OMPI_SUCCESS != ret) {
 992         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 993                             "%s:%d: modex_send failed: %d\n",
 994                             __FILE__, __LINE__, ret);
 995         goto error;
 996     }
 997 
 998     ompi_mtl_ofi.epnamelen = namelen;
 999 
1000     /**
1001      * Set the ANY_SRC address.
1002      */
1003     ompi_mtl_ofi.any_addr = FI_ADDR_UNSPEC;
1004 
1005     return &ompi_mtl_ofi.base;
1006 
1007 error:
1008     if (providers) {
1009         (void) fi_freeinfo(providers);
1010     }
1011     if (prov_cq_data) {
1012         (void) fi_freeinfo(prov_cq_data);
1013     }
1014     if (hints) {
1015         (void) fi_freeinfo(hints);
1016     }
1017     if (ompi_mtl_ofi.sep) {
1018         (void) fi_close((fid_t)ompi_mtl_ofi.sep);
1019     }
1020     if (ompi_mtl_ofi.av) {
1021         (void) fi_close((fid_t)ompi_mtl_ofi.av);
1022     }
1023     if ((0 == ompi_mtl_ofi.enable_sep) &&
1024         ompi_mtl_ofi.ofi_ctxt != NULL &&
1025          ompi_mtl_ofi.ofi_ctxt[0].cq) {
1026         /* Check if CQ[0] was created for non-SEP case and close if needed */
1027         (void) fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[0].cq);
1028     }
1029     if (ompi_mtl_ofi.domain) {
1030         (void) fi_close((fid_t)ompi_mtl_ofi.domain);
1031     }
1032     if (ompi_mtl_ofi.fabric) {
1033         (void) fi_close((fid_t)ompi_mtl_ofi.fabric);
1034     }
1035     if (ompi_mtl_ofi.comm_to_context) {
1036         free(ompi_mtl_ofi.comm_to_context);
1037     }
1038     if (ompi_mtl_ofi.ofi_ctxt) {
1039         free(ompi_mtl_ofi.ofi_ctxt);
1040     }
1041 
1042     return NULL;
1043 }
1044 
1045 int
1046 ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl)
1047 {
1048     ssize_t ret;
1049 
1050     opal_progress_unregister(ompi_mtl_ofi_progress_no_inline);
1051 
1052     /* Close all the OFI objects */
1053     if ((ret = fi_close((fid_t)ompi_mtl_ofi.sep))) {
1054         goto finalize_err;
1055     }
1056 
1057     if ((ret = fi_close((fid_t)ompi_mtl_ofi.av))) {
1058         goto finalize_err;
1059     }
1060 
1061     if (0 == ompi_mtl_ofi.enable_sep) {
1062         /*
1063          * CQ[0] is bound to SEP object Nwhen SEP is not supported by a
1064          * provider. OFI spec requires that we close the Endpoint that is bound
1065          * to the CQ before closing the CQ itself. So, for the non-SEP case, we
1066          * handle the closing of CQ[0] here.
1067          */
1068         if ((ret = fi_close((fid_t)ompi_mtl_ofi.ofi_ctxt[0].cq))) {
1069             goto finalize_err;
1070         }
1071     }
1072 
1073     if ((ret = fi_close((fid_t)ompi_mtl_ofi.domain))) {
1074         goto finalize_err;
1075     }
1076 
1077     if ((ret = fi_close((fid_t)ompi_mtl_ofi.fabric))) {
1078         goto finalize_err;
1079     }
1080 
1081     /* Free memory allocated for TX/RX contexts */
1082     free(ompi_mtl_ofi.comm_to_context);
1083     free(ompi_mtl_ofi.ofi_ctxt);
1084 
1085     return OMPI_SUCCESS;
1086 
1087 finalize_err:
1088     opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
1089                    "fi_close",
1090                    ompi_process_info.nodename, __FILE__, __LINE__,
1091                    fi_strerror(-ret), -ret);
1092 
1093     return OMPI_ERROR;
1094 }
1095 
1096 
1097 

/* [<][>][^][v][top][bottom][index][help] */