root/ompi/mca/mtl/psm2/mtl_psm2.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_mtl_psm2_errhandler
  2. ompi_mtl_psm2_module_init
  3. ompi_mtl_psm2_finalize
  4. ompi_mtl_psm2_connect_error_msg
  5. ompi_mtl_psm2_add_procs
  6. ompi_mtl_psm2_del_procs
  7. ompi_mtl_psm2_add_comm
  8. ompi_mtl_psm2_del_comm
  9. ompi_mtl_psm2_progress

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2010 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2006 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006      QLogic Corporation. All rights reserved.
  14  * Copyright (c) 2013-2017 Intel, Inc. All rights reserved
  15  * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
  16  *                         reserved.
  17  * Copyright (c) 2016      Research Organization for Information Science
  18  *                         and Technology (RIST). All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "ompi_config.h"
  27 
  28 #include "opal/mca/pmix/pmix.h"
  29 #include "ompi/mca/mtl/mtl.h"
  30 #include "ompi/mca/mtl/base/mtl_base_datatype.h"
  31 #include "opal/util/show_help.h"
  32 #include "ompi/proc/proc.h"
  33 
  34 #include "mtl_psm2.h"
  35 #include "mtl_psm2_types.h"
  36 #include "mtl_psm2_endpoint.h"
  37 #include "mtl_psm2_request.h"
  38 
  39 mca_mtl_psm2_module_t ompi_mtl_psm2 = {
  40     .super = {
  41         /* NTH: PSM2 supports 16 bit context ids */
  42         .mtl_max_contextid = (1UL << 16) - 1,
  43         .mtl_max_tag = (1UL << 30),  /* must allow negatives */
  44 
  45         .mtl_add_procs = ompi_mtl_psm2_add_procs,
  46         .mtl_del_procs = ompi_mtl_psm2_del_procs,
  47         .mtl_finalize = ompi_mtl_psm2_finalize,
  48 
  49         .mtl_send = ompi_mtl_psm2_send,
  50         .mtl_isend = ompi_mtl_psm2_isend,
  51 
  52         .mtl_irecv = ompi_mtl_psm2_irecv,
  53         .mtl_iprobe = ompi_mtl_psm2_iprobe,
  54         .mtl_imrecv = ompi_mtl_psm2_imrecv,
  55         .mtl_improbe = ompi_mtl_psm2_improbe,
  56 
  57         .mtl_cancel = ompi_mtl_psm2_cancel,
  58         .mtl_add_comm = ompi_mtl_psm2_add_comm,
  59         .mtl_del_comm = ompi_mtl_psm2_del_comm
  60     }
  61 };
  62 
  63 static
  64 psm2_error_t
  65 ompi_mtl_psm2_errhandler(psm2_ep_t ep, const psm2_error_t error,
  66                         const char *error_string, psm2_error_token_t token)
  67 {
  68     switch (error) {
  69         /* We don't want PSM2 to default to exiting when the following errors occur */
  70         case PSM2_EP_DEVICE_FAILURE:
  71         case PSM2_EP_NO_DEVICE:
  72         case PSM2_EP_NO_PORTS_AVAIL:
  73         case PSM2_EP_NO_NETWORK:
  74         case PSM2_EP_INVALID_UUID_KEY:
  75           opal_show_help("help-mtl-psm2.txt",
  76                          "unable to open endpoint", true,
  77                          psm2_error_get_string(error));
  78             break;
  79 
  80         /* We can't handle any other errors than the ones above */
  81         default:
  82             opal_output(0, "Open MPI detected an unexpected PSM2 error in opening "
  83                         "an endpoint: %s\n", error_string);
  84             return psm2_error_defer(token);
  85             break;
  86     }
  87     return error;
  88 }
  89 
  90 int ompi_mtl_psm2_progress( void );
  91 
  92 int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
  93     psm2_error_t err;
  94     psm2_ep_t   ep; /* endpoint handle */
  95     psm2_mq_t   mq;
  96     psm2_epid_t epid; /* unique lid+port identifier */
  97     psm2_uuid_t  unique_job_key;
  98     struct psm2_ep_open_opts ep_opt;
  99     unsigned long long *uu = (unsigned long long *) unique_job_key;
 100     char *generated_key;
 101     char env_string[256];
 102     int rc;
 103 
 104     generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
 105     memset(uu, 0, sizeof(psm2_uuid_t));
 106 
 107     if (!generated_key || (strlen(generated_key) != 33) ||
 108         sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
 109     {
 110       opal_show_help("help-mtl-psm2.txt",
 111                      "no uuid present", true,
 112                      generated_key ? "could not be parsed from" :
 113                      "not present in", ompi_process_info.nodename);
 114       return OMPI_ERROR;
 115 
 116     }
 117 
 118     /* Handle our own errors for opening endpoints */
 119     psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);
 120 
 121     /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware
 122      * contexts correctly.
 123      */
 124     snprintf(env_string, sizeof(env_string), "%d", local_rank);
 125     setenv("MPI_LOCALRANKID", env_string, 0);
 126     snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
 127     setenv("MPI_LOCALNRANKS", env_string, 0);
 128 
 129     /* Setup the endpoint options. */
 130     psm2_ep_open_opts_get_defaults(&ep_opt);
 131     ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
 132     ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */
 133 
 134     /* Open PSM2 endpoint */
 135     err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid);
 136     if (err) {
 137       opal_show_help("help-mtl-psm2.txt",
 138                      "unable to open endpoint", true,
 139                      psm2_error_get_string(err));
 140       return OMPI_ERROR;
 141     }
 142 
 143     /* Future errors are handled by the default error handler */
 144     psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);
 145 
 146     err = psm2_mq_init(ep,
 147                       0xffff000000000000ULL,
 148                       NULL,
 149                       0,
 150                       &mq);
 151     if (err) {
 152       opal_show_help("help-mtl-psm2.txt",
 153                      "psm2 init", true,
 154                      psm2_error_get_string(err));
 155       return OMPI_ERROR;
 156     }
 157 
 158     ompi_mtl_psm2.ep   = ep;
 159     ompi_mtl_psm2.epid = epid;
 160     ompi_mtl_psm2.mq   = mq;
 161 
 162     OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
 163                     &mca_mtl_psm2_component.super.mtl_version,
 164                     &ompi_mtl_psm2.epid,
 165                     sizeof(psm2_epid_t));
 166 
 167     if (OMPI_SUCCESS != rc) {
 168         opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
 169         return OMPI_ERROR;
 170     }
 171 
 172 
 173     /* register the psm2 progress function */
 174     opal_progress_register(ompi_mtl_psm2_progress);
 175 
 176 #if OPAL_CUDA_SUPPORT
 177     ompi_mtl_psm2.super.mtl_flags |= MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE;
 178 #endif
 179 
 180     return OMPI_SUCCESS;
 181 }
 182 
 183 int
 184 ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
 185     psm2_error_t err;
 186 
 187     opal_progress_unregister(ompi_mtl_psm2_progress);
 188 
 189     /* free resources */
 190     err = psm2_mq_finalize(ompi_mtl_psm2.mq);
 191     if (err) {
 192         opal_output(0, "Error in psm2_mq_finalize (error %s)\n",
 193                     psm2_error_get_string(err));
 194         return OMPI_ERROR;
 195     }
 196 
 197     err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9);
 198     if (err) {
 199         opal_output(0, "Error in psm2_ep_close (error %s)\n",
 200                     psm2_error_get_string(err));
 201         return OMPI_ERROR;
 202     }
 203 
 204     err = psm2_finalize();
 205     if (err) {
 206         opal_output(0, "Error in psm2_finalize (error %s)\n",
 207                     psm2_error_get_string(err));
 208         return OMPI_ERROR;
 209     }
 210 
 211     return OMPI_SUCCESS;
 212 }
 213 
 214 static
 215 const char *
 216 ompi_mtl_psm2_connect_error_msg(psm2_error_t err)
 217 {
 218     switch (err) { /* See if we expect the error */
 219         case PSM2_EPID_UNREACHABLE:
 220         case PSM2_EPID_INVALID_NODE:
 221         case PSM2_EPID_INVALID_MTU:
 222         case PSM2_EPID_INVALID_UUID_KEY:
 223         case PSM2_EPID_INVALID_VERSION:
 224         case PSM2_EPID_INVALID_CONNECT:
 225             return psm2_error_get_string(err);
 226             break;
 227         case PSM2_EPID_UNKNOWN:
 228             return "Connect status could not be determined "
 229                    "because of other errors";
 230         default:
 231             return NULL;
 232     }
 233 }
 234 
 235 #ifndef min
 236 #  define min(a,b) ((a) < (b) ? (a) : (b))
 237 #endif
 238 
 239 #ifndef max
 240 #  define max(a,b) ((a) > (b) ? (a) : (b))
 241 #endif
 242 
 243 int
 244 ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
 245                       size_t nprocs,
 246                       struct ompi_proc_t** procs)
 247 {
 248     int i,j;
 249     int rc;
 250     psm2_epid_t   *epids_in = NULL;
 251     int *mask_in = NULL;
 252     psm2_epid_t  *epid;
 253     psm2_epaddr_t *epaddrs_out = NULL;
 254     psm2_error_t  *errs_out = NULL, err;
 255     size_t size;
 256     int proc_errors[PSM2_ERROR_LAST] = { 0 };
 257     int timeout_in_secs;
 258 
 259     assert(mtl == &ompi_mtl_psm2.super);
 260     rc = OMPI_ERR_OUT_OF_RESOURCE;
 261 
 262     errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t));
 263     if (errs_out == NULL) {
 264         goto bail;
 265     }
 266     epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t));
 267     if (epids_in == NULL) {
 268         goto bail;
 269     }
 270     mask_in = (int *) malloc(nprocs * sizeof(int));
 271     if (mask_in == NULL) {
 272         goto bail;
 273     }
 274     epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t));
 275     if (epaddrs_out == NULL) {
 276         goto bail;
 277     }
 278     rc = OMPI_SUCCESS;
 279 
 280     /* Get the epids for all the processes from modex */
 281     for (i = 0; i < (int) nprocs; i++) {
 282         if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
 283             /* Already connected: don't connect again */
 284             mask_in[i] = 0;
 285             continue;
 286         }
 287 
 288         OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version,
 289                         &procs[i]->super.proc_name, (void**)&epid, &size);
 290         if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) {
 291           return OMPI_ERROR;
 292         }
 293         epids_in[i] = *epid;
 294         mask_in[i] = 1;
 295     }
 296 
 297     timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs);
 298 
 299     psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP);
 300 
 301     err = psm2_ep_connect(ompi_mtl_psm2.ep,
 302                          nprocs,
 303                          epids_in,
 304                          mask_in,
 305                          errs_out,
 306                          epaddrs_out,
 307                          timeout_in_secs * 1e9);
 308     if (err) {
 309         char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err);
 310         if (errstr == NULL) {
 311             opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n",
 312                         psm2_error_get_string(err));
 313         }
 314         for (i = 0; i < (int) nprocs; i++) {
 315             if (0 == mask_in[i]) {
 316                     continue;
 317             }
 318 
 319             psm2_error_t thiserr = errs_out[i];
 320             errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr);
 321             if (proc_errors[thiserr] == 0) {
 322                 proc_errors[thiserr] = 1;
 323                 opal_output(0, "PSM2 EP connect error (%s):",
 324                             errstr ? errstr : "unknown connect error");
 325                 for (j = 0; j < (int) nprocs; j++) {
 326                   if (errs_out[j] == thiserr) {
 327                       opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ?
 328                                   "unknown" : procs[j]->super.proc_hostname);
 329                   }
 330                 }
 331                 opal_output(0, "\n");
 332             }
 333         }
 334 
 335         rc = OMPI_ERROR;
 336     }
 337     else {
 338         /* Default error handling is enabled, errors will not be returned to
 339          * user.  PSM2 prints the error and the offending endpoint's hostname
 340          * and exits with -1 */
 341         psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);
 342 
 343         /* Fill in endpoint data */
 344         for (i = 0; i < (int) nprocs; i++) {
 345             if (0 == mask_in[i]) {
 346                     continue;
 347             }
 348 
 349             mca_mtl_psm2_endpoint_t *endpoint =
 350                 (mca_mtl_psm2_endpoint_t *) OBJ_NEW(mca_mtl_psm2_endpoint_t);
 351             endpoint->peer_epid = epids_in[i];
 352             endpoint->peer_addr = epaddrs_out[i];
 353             procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
 354         }
 355 
 356         rc = OMPI_SUCCESS;
 357     }
 358 
 359 bail:
 360     if (epids_in != NULL) {
 361         free(epids_in);
 362     }
 363     if (mask_in != NULL) {
 364         free(mask_in);
 365     }
 366     if (errs_out != NULL) {
 367         free(errs_out);
 368     }
 369     if (epaddrs_out != NULL) {
 370         free(epaddrs_out);
 371     }
 372 
 373     return rc;
 374 }
 375 
 376 int
 377 ompi_mtl_psm2_del_procs(struct mca_mtl_base_module_t *mtl,
 378                       size_t nprocs,
 379                       struct ompi_proc_t** procs)
 380 {
 381     return OMPI_SUCCESS;
 382 }
 383 
 384 
 385 int
 386 ompi_mtl_psm2_add_comm(struct mca_mtl_base_module_t *mtl,
 387                       struct ompi_communicator_t *comm)
 388 {
 389     return OMPI_SUCCESS;
 390 }
 391 
 392 
 393 int
 394 ompi_mtl_psm2_del_comm(struct mca_mtl_base_module_t *mtl,
 395                       struct ompi_communicator_t *comm)
 396 {
 397     return OMPI_SUCCESS;
 398 }
 399 
 400 
 401 int ompi_mtl_psm2_progress( void ) {
 402     psm2_error_t err;
 403     mca_mtl_psm2_request_t* mtl_psm2_request;
 404     psm2_mq_status2_t psm2_status;
 405     psm2_mq_req_t req;
 406     int completed = 1;
 407 
 408     do {
 409         OPAL_THREAD_LOCK(&mtl_psm2_mq_mutex);
 410         err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
 411         if (err == PSM2_MQ_INCOMPLETE) {
 412             OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
 413             return completed;
 414         } else if (OPAL_UNLIKELY(err != PSM2_OK)) {
 415             OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
 416             goto error;
 417         }
 418 
 419         err = psm2_mq_test2(&req, &psm2_status);
 420         OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
 421 
 422         if (OPAL_UNLIKELY (err != PSM2_OK)) {
 423             goto error;
 424         }
 425 
 426         completed++;
 427 
 428         mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context;
 429 
 430         if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
 431 
 432             mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
 433                 psm2_status.msg_tag.tag1;
 434             mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
 435                 psm2_status.msg_tag.tag0;
 436             mtl_psm2_request->super.ompi_req->req_status._ucount =
 437                 psm2_status.nbytes;
 438 
 439             ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
 440                 mtl_psm2_request->buf,
 441                 psm2_status.msg_length);
 442         }
 443 
 444         if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
 445           if (mtl_psm2_request->free_after) {
 446             free(mtl_psm2_request->buf);
 447           }
 448         }
 449 
 450         switch (psm2_status.error_code) {
 451             case PSM2_OK:
 452             mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
 453                 OMPI_SUCCESS;
 454             break;
 455             case PSM2_MQ_TRUNCATION:
 456             mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
 457                 MPI_ERR_TRUNCATE;
 458             break;
 459             default:
 460             mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
 461                 MPI_ERR_INTERN;
 462         }
 463 
 464         mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
 465     }
 466     while (1);
 467 
 468  error:
 469     opal_show_help("help-mtl-psm2.txt",
 470                    "error polling network", true,
 471                    psm2_error_get_string(err));
 472     return 1;
 473 }

/* [<][>][^][v][top][bottom][index][help] */