root/ompi/mca/mtl/ofi/mtl_ofi.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_mtl_ofi_add_procs
  2. ompi_mtl_ofi_del_procs

   1 /*
   2  * Copyright (c) 2013-2018 Intel, Inc. All rights reserved
   3  *
   4  * $COPYRIGHT$
   5  *
   6  * Additional copyrights may follow
   7  *
   8  * $HEADER$
   9  */
  10 
  11 #include "mtl_ofi.h"
  12 
  13 OMPI_DECLSPEC extern mca_mtl_ofi_component_t mca_mtl_ofi_component;
  14 
  15 mca_mtl_ofi_module_t ompi_mtl_ofi = {
  16     {
  17         (int)((1ULL << MTL_OFI_CID_BIT_COUNT_1) - 1), /* max cid */
  18         (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1)) - 1) ,/* max tag value */
  19         0,           /* request reserve space */
  20         0,           /* flags */
  21 
  22         ompi_mtl_ofi_add_procs,
  23         ompi_mtl_ofi_del_procs,
  24         ompi_mtl_ofi_finalize,
  25 
  26          NULL,
  27          NULL,
  28          NULL,
  29          NULL,
  30          ompi_mtl_ofi_imrecv,
  31          NULL,
  32 
  33         ompi_mtl_ofi_cancel,
  34         ompi_mtl_ofi_add_comm,
  35         ompi_mtl_ofi_del_comm
  36     },
  37     0,
  38     0,
  39     NULL,
  40     NULL
  41 };
  42 
  43 int
  44 ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
  45                        size_t nprocs,
  46                        struct ompi_proc_t** procs)
  47 {
  48     int ret = OMPI_SUCCESS;
  49     size_t i;
  50     size_t size;
  51     size_t namelen;
  52     int count = 0;
  53     char *ep_name = NULL;
  54     char *ep_names = NULL;
  55     fi_addr_t *fi_addrs = NULL;
  56     mca_mtl_ofi_endpoint_t *endpoint = NULL;
  57     int num_peers_limit = (1 << ompi_mtl_ofi.num_bits_source_rank) - 1;
  58 
  59     namelen = ompi_mtl_ofi.epnamelen;
  60 
  61     /* We cannot add more ranks than available tag bits */
  62     if ((false == ompi_mtl_ofi.fi_cq_data) &&
  63         OPAL_UNLIKELY(((int) (nprocs + ompi_mtl_ofi.num_peers) > num_peers_limit))) {
  64         opal_output(0, "%s:%d: OFI provider: %s does not have enough bits for source rank in its tag.\n"
  65                        "Adding more ranks will result in undefined behaviour. Please enable\n"
  66                        "FI_REMOTE_CQ_DATA feature in the provider. For more info refer fi_cq(3).\n",
  67                        __FILE__, __LINE__, ompi_mtl_ofi.provider_name);
  68         fflush(stderr);
  69         ret = OMPI_ERROR;
  70         goto bail;
  71     }
  72 
  73     /**
  74      * Create array of EP names.
  75      */
  76     ep_names = malloc(nprocs * namelen);
  77     if (NULL == ep_names) {
  78         ret = OMPI_ERROR;
  79         goto bail;
  80     }
  81 
  82     /**
  83      * Create array of fi_addrs.
  84      */
  85     fi_addrs = malloc(nprocs * sizeof(fi_addr_t));
  86     if (NULL == fi_addrs) {
  87         ret = OMPI_ERROR;
  88         goto bail;
  89     }
  90 
  91     /**
  92      * Retrieve the processes' EP names from modex.
  93      */
  94     for (i = 0; i < nprocs; ++i) {
  95         OFI_COMPAT_MODEX_RECV(ret,
  96                               &mca_mtl_ofi_component.super.mtl_version,
  97                               procs[i],
  98                               (void**)&ep_name,
  99                               &size);
 100         if (OMPI_SUCCESS != ret) {
 101             opal_show_help("help-mtl-ofi.txt", "modex failed",
 102                            true, ompi_process_info.nodename,
 103                            procs[i]->super.proc_hostname,
 104                            opal_strerror(ret), ret);
 105             goto bail;
 106         }
 107         memcpy(&ep_names[i*namelen], ep_name, namelen);
 108     }
 109 
 110     /**
 111      * Map the EP names to fi_addrs.
 112      */
 113     count = fi_av_insert(ompi_mtl_ofi.av, ep_names, nprocs, fi_addrs, 0, NULL);
 114     if ((count < 0) || (nprocs != (size_t)count)) {
 115         opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 116                             "%s:%d: fi_av_insert failed: %d\n",
 117                             __FILE__, __LINE__, count);
 118         ret = OMPI_ERROR;
 119         goto bail;
 120     }
 121 
 122     /**
 123      * Store the fi_addrs within the endpoint objects.
 124      */
 125     for (i = 0; i < nprocs; ++i) {
 126         endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t);
 127         if (NULL == endpoint) {
 128             opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 129                                 "%s:%d: mtl/ofi: could not allocate endpoint"
 130                                 " structure\n",
 131                                 __FILE__, __LINE__);
 132             ret = OMPI_ERROR;
 133             goto bail;
 134         }
 135 
 136         endpoint->mtl_ofi_module = &ompi_mtl_ofi;
 137         endpoint->peer_fiaddr = fi_addrs[i];
 138 
 139         /* FIXME: What happens if this endpoint already exists? */
 140         procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
 141     }
 142 
 143     /* Update global counter of number of procs added to this rank */
 144     ompi_mtl_ofi.num_peers += nprocs;
 145 
 146     ret = OMPI_SUCCESS;
 147 
 148 bail:
 149     if (fi_addrs)
 150         free(fi_addrs);
 151 
 152     if (ep_names)
 153         free(ep_names);
 154 
 155     return ret;
 156 }
 157 
 158 int
 159 ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl,
 160                        size_t nprocs,
 161                        struct ompi_proc_t** procs)
 162 {
 163     int ret;
 164     size_t i;
 165     mca_mtl_ofi_endpoint_t *endpoint = NULL;
 166 
 167     for (i = 0 ; i < nprocs ; ++i) {
 168         if (NULL != procs[i] &&
 169             NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
 170             endpoint = procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
 171             ret = fi_av_remove(ompi_mtl_ofi.av, &endpoint->peer_fiaddr, 1, 0);
 172             if (ret) {
 173                 opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
 174                         "%s:%d: fi_av_remove failed: %s\n", __FILE__, __LINE__, fi_strerror(errno));
 175                 return ret;
 176             }
 177             procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = NULL;
 178             OBJ_RELEASE(endpoint);
 179         }
 180     }
 181 
 182     return OMPI_SUCCESS;
 183 }

/* [<][>][^][v][top][bottom][index][help] */