root/ompi/mca/mtl/psm/mtl_psm.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_mtl_psm_errhandler
  2. ompi_mtl_psm_module_init
  3. ompi_mtl_psm_finalize
  4. ompi_mtl_psm_connect_error_msg
  5. ompi_mtl_psm_add_procs
  6. ompi_mtl_psm_del_procs
  7. ompi_mtl_psm_add_comm
  8. ompi_mtl_psm_del_comm
  9. ompi_mtl_psm_progress

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2010 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2006 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006      QLogic Corporation. All rights reserved.
  14  * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
  15  * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
  16  *                         reserved.
  17  * Copyright (c) 2016      Research Organization for Information Science
  18  *                         and Technology (RIST). All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "ompi_config.h"
  27 
  28 #include "opal/mca/pmix/pmix.h"
  29 #include "ompi/mca/mtl/mtl.h"
  30 #include "ompi/mca/mtl/base/mtl_base_datatype.h"
  31 #include "opal/util/show_help.h"
  32 #include "ompi/proc/proc.h"
  33 
  34 #include "mtl_psm.h"
  35 #include "mtl_psm_types.h"
  36 #include "mtl_psm_endpoint.h"
  37 #include "mtl_psm_request.h"
  38 
  39 mca_mtl_psm_module_t ompi_mtl_psm = {
  40     .super = {
  41         /* NTH: PSM supports 16 bit context ids */
  42         .mtl_max_contextid = (1UL << 16) - 1,
  43         .mtl_max_tag = (1UL << 30),  /* must allow negatives */
  44 
  45         .mtl_add_procs = ompi_mtl_psm_add_procs,
  46         .mtl_del_procs = ompi_mtl_psm_del_procs,
  47         .mtl_finalize = ompi_mtl_psm_finalize,
  48 
  49         .mtl_send = ompi_mtl_psm_send,
  50         .mtl_isend = ompi_mtl_psm_isend,
  51 
  52         .mtl_irecv = ompi_mtl_psm_irecv,
  53         .mtl_iprobe = ompi_mtl_psm_iprobe,
  54         .mtl_imrecv = ompi_mtl_psm_imrecv,
  55         .mtl_improbe = ompi_mtl_psm_improbe,
  56 
  57         .mtl_cancel = ompi_mtl_psm_cancel,
  58         .mtl_add_comm = ompi_mtl_psm_add_comm,
  59         .mtl_del_comm = ompi_mtl_psm_del_comm
  60     }
  61 };
  62 
  63 static
  64 psm_error_t
  65 ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error,
  66                         const char *error_string, psm_error_token_t token)
  67 {
  68     switch (error) {
  69         /* We don't want PSM to default to exiting when the following errors occur */
  70         case PSM_EP_DEVICE_FAILURE:
  71         case PSM_EP_NO_DEVICE:
  72         case PSM_EP_NO_PORTS_AVAIL:
  73         case PSM_EP_NO_NETWORK:
  74         case PSM_EP_INVALID_UUID_KEY:
  75           opal_show_help("help-mtl-psm.txt",
  76                          "unable to open endpoint", true,
  77                          psm_error_get_string(error));
  78             break;
  79 
  80         /* We can't handle any other errors than the ones above */
  81         default:
  82             opal_output(0, "Open MPI detected an unexpected PSM error in opening "
  83                         "an endpoint: %s\n", error_string);
  84             return psm_error_defer(token);
  85             break;
  86     }
  87     return error;
  88 }
  89 
  90 int ompi_mtl_psm_progress( void );
  91 
  92 int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
  93     psm_error_t err;
  94     psm_ep_t    ep; /* endpoint handle */
  95     psm_mq_t    mq;
  96     psm_epid_t  epid; /* unique lid+port identifier */
  97     psm_uuid_t  unique_job_key;
  98     struct psm_ep_open_opts ep_opt;
  99     unsigned long long *uu = (unsigned long long *) unique_job_key;
 100     char *generated_key;
 101     char env_string[256];
 102     int rc;
 103 
 104     generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
 105     memset(uu, 0, sizeof(psm_uuid_t));
 106 
 107     if (!generated_key || (strlen(generated_key) != 33) ||
 108         sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
 109     {
 110       opal_show_help("help-mtl-psm.txt",
 111                      "no uuid present", true,
 112                      generated_key ? "could not be parsed from" :
 113                      "not present in", ompi_process_info.nodename);
 114       return OMPI_ERROR;
 115 
 116     }
 117 
 118     /* Handle our own errors for opening endpoints */
 119     psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
 120 
 121     /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
 122      * contexts correctly.
 123      */
 124     snprintf(env_string, sizeof(env_string), "%d", local_rank);
 125     setenv("MPI_LOCALRANKID", env_string, 0);
 126     snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
 127     setenv("MPI_LOCALNRANKS", env_string, 0);
 128 
 129     /* Setup the endpoint options. */
 130     bzero((void*) &ep_opt, sizeof(ep_opt));
 131     ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9;
 132     ep_opt.unit = ompi_mtl_psm.ib_unit;
 133     ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */
 134     ep_opt.shm_mbytes = -1; /* Choose PSM defaults */
 135     ep_opt.sendbufs_num = -1; /* Choose PSM defaults */
 136 
 137 #if PSM_VERNO >= 0x0101
 138     ep_opt.network_pkey = ompi_mtl_psm.ib_pkey;
 139 #endif
 140 
 141 #if PSM_VERNO >= 0x0107
 142     ep_opt.port = ompi_mtl_psm.ib_port;
 143     ep_opt.outsl = ompi_mtl_psm.ib_service_level;
 144 #endif
 145 
 146 #if PSM_VERNO >= 0x010d
 147     ep_opt.service_id = ompi_mtl_psm.ib_service_id;
 148     ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
 149 #endif
 150 
 151     /* Open PSM endpoint */
 152     err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
 153     if (err) {
 154       opal_show_help("help-mtl-psm.txt",
 155                      "unable to open endpoint", true,
 156                      psm_error_get_string(err));
 157       return OMPI_ERROR;
 158     }
 159 
 160     /* Future errors are handled by the default error handler */
 161     psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
 162 
 163     err = psm_mq_init(ep,
 164                       0xffff000000000000ULL,
 165                       NULL,
 166                       0,
 167                       &mq);
 168     if (err) {
 169       opal_show_help("help-mtl-psm.txt",
 170                      "psm init", true,
 171                      psm_error_get_string(err));
 172       return OMPI_ERROR;
 173     }
 174 
 175     ompi_mtl_psm.ep   = ep;
 176     ompi_mtl_psm.epid = epid;
 177     ompi_mtl_psm.mq   = mq;
 178 
 179     OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
 180                     &mca_mtl_psm_component.super.mtl_version,
 181                     &ompi_mtl_psm.epid,
 182                     sizeof(psm_epid_t));
 183 
 184     if (OMPI_SUCCESS != rc) {
 185         opal_output(0, "Open MPI couldn't send PSM epid to head node process");
 186         return OMPI_ERROR;
 187     }
 188 
 189     /* register the psm progress function */
 190     opal_progress_register(ompi_mtl_psm_progress);
 191 
 192     return OMPI_SUCCESS;
 193 }
 194 
 195 int
 196 ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) {
 197     psm_error_t err;
 198 
 199     opal_progress_unregister(ompi_mtl_psm_progress);
 200 
 201     /* free resources */
 202     err = psm_mq_finalize(ompi_mtl_psm.mq);
 203     if (err) {
 204         opal_output(0, "Error in psm_mq_finalize (error %s)\n",
 205                     psm_error_get_string(err));
 206         return OMPI_ERROR;
 207     }
 208 
 209     err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9);
 210     if (err) {
 211         opal_output(0, "Error in psm_ep_close (error %s)\n",
 212                     psm_error_get_string(err));
 213         return OMPI_ERROR;
 214     }
 215 
 216     err = psm_finalize();
 217     if (err) {
 218         opal_output(0, "Error in psm_finalize (error %s)\n",
 219                     psm_error_get_string(err));
 220         return OMPI_ERROR;
 221     }
 222 
 223     return OMPI_SUCCESS;
 224 }
 225 
 226 static
 227 const char *
 228 ompi_mtl_psm_connect_error_msg(psm_error_t err)
 229 {
 230     switch (err) { /* See if we expect the error */
 231         case PSM_EPID_UNREACHABLE:
 232         case PSM_EPID_INVALID_NODE:
 233         case PSM_EPID_INVALID_MTU:
 234         case PSM_EPID_INVALID_UUID_KEY:
 235         case PSM_EPID_INVALID_VERSION:
 236         case PSM_EPID_INVALID_CONNECT:
 237             return psm_error_get_string(err);
 238             break;
 239         case PSM_EPID_UNKNOWN:
 240             return "Connect status could not be determined "
 241                    "because of other errors";
 242         default:
 243             return NULL;
 244     }
 245 }
 246 
 247 #ifndef min
 248 #  define min(a,b) ((a) < (b) ? (a) : (b))
 249 #endif
 250 
 251 #ifndef max
 252 #  define max(a,b) ((a) > (b) ? (a) : (b))
 253 #endif
 254 
 255 int
 256 ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
 257                       size_t nprocs,
 258                       struct ompi_proc_t** procs)
 259 {
 260     int i,j;
 261     int rc;
 262     psm_epid_t   *epids_in = NULL;
 263     int *mask_in = NULL;
 264     psm_epid_t   *epid;
 265     psm_epaddr_t *epaddrs_out = NULL;
 266     psm_error_t  *errs_out = NULL, err;
 267     size_t size;
 268     int proc_errors[PSM_ERROR_LAST] = { 0 };
 269     int timeout_in_secs;
 270 
 271     assert(mtl == &ompi_mtl_psm.super);
 272     rc = OMPI_ERR_OUT_OF_RESOURCE;
 273 
 274     errs_out = (psm_error_t *) malloc(nprocs * sizeof(psm_error_t));
 275     if (errs_out == NULL) {
 276         goto bail;
 277     }
 278     epids_in = (psm_epid_t *) malloc(nprocs * sizeof(psm_epid_t));
 279     if (epids_in == NULL) {
 280         goto bail;
 281     }
 282     mask_in = (int *) malloc(nprocs * sizeof(int));
 283     if (mask_in == NULL) {
 284         goto bail;
 285     }
 286     epaddrs_out = (psm_epaddr_t *) malloc(nprocs * sizeof(psm_epaddr_t));
 287     if (epaddrs_out == NULL) {
 288         goto bail;
 289     }
 290     rc = OMPI_SUCCESS;
 291 
 292     /* Get the epids for all the processes from modex */
 293     for (i = 0; i < (int) nprocs; i++) {
 294         if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
 295             /* Already connected: don't connect again */
 296             mask_in[i] = 0;
 297             continue;
 298         }
 299 
 300         OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version,
 301                         &procs[i]->super.proc_name, (void**)&epid, &size);
 302         if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) {
 303           rc = OMPI_ERROR;
 304           goto bail;
 305         }
 306         epids_in[i] = *epid;
 307         mask_in[i] = 1;
 308     }
 309 
 310     timeout_in_secs = max(ompi_mtl_psm.connect_timeout, 0.5 * nprocs);
 311 
 312     psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP);
 313 
 314     err = psm_ep_connect(ompi_mtl_psm.ep,
 315                          nprocs,
 316                          epids_in,
 317                          mask_in,
 318                          errs_out,
 319                          epaddrs_out,
 320                          timeout_in_secs * 1e9);
 321     if (err) {
 322         char *errstr = (char *) ompi_mtl_psm_connect_error_msg(err);
 323         if (errstr == NULL) {
 324             opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
 325                         psm_error_get_string(err));
 326         }
 327         for (i = 0; i < (int) nprocs; i++) {
 328             if (0 == mask_in[i]) {
 329                     continue;
 330             }
 331 
 332             psm_error_t thiserr = errs_out[i];
 333             errstr = (char *) ompi_mtl_psm_connect_error_msg(thiserr);
 334             if (proc_errors[thiserr] == 0) {
 335                 proc_errors[thiserr] = 1;
 336                 opal_output(0, "PSM EP connect error (%s):",
 337                             errstr ? errstr : "unknown connect error");
 338                 for (j = 0; j < (int) nprocs; j++) {
 339                   if (errs_out[j] == thiserr) {
 340                       opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ?
 341                                   "unknown" : procs[j]->super.proc_hostname);
 342                   }
 343                 }
 344                 opal_output(0, "\n");
 345             }
 346         }
 347 
 348         rc = OMPI_ERROR;
 349     }
 350     else {
 351         /* Default error handling is enabled, errors will not be returned to
 352          * user.  PSM prints the error and the offending endpoint's hostname
 353          * and exits with -1 */
 354         psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
 355 
 356         /* Fill in endpoint data */
 357         for (i = 0; i < (int) nprocs; i++) {
 358             if (0 == mask_in[i]) {
 359                     continue;
 360             }
 361 
 362             mca_mtl_psm_endpoint_t *endpoint =
 363                 (mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
 364             endpoint->peer_epid = epids_in[i];
 365             endpoint->peer_addr = epaddrs_out[i];
 366             procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
 367         }
 368 
 369         rc = OMPI_SUCCESS;
 370     }
 371 
 372 bail:
 373     if (epids_in != NULL) {
 374         free(epids_in);
 375     }
 376     if (mask_in != NULL) {
 377         free(mask_in);
 378     }
 379     if (errs_out != NULL) {
 380         free(errs_out);
 381     }
 382     if (epaddrs_out != NULL) {
 383         free(epaddrs_out);
 384     }
 385 
 386     return rc;
 387 }
 388 
 389 int
 390 ompi_mtl_psm_del_procs(struct mca_mtl_base_module_t *mtl,
 391                       size_t nprocs,
 392                       struct ompi_proc_t** procs)
 393 {
 394     return OMPI_SUCCESS;
 395 }
 396 
 397 
 398 int
 399 ompi_mtl_psm_add_comm(struct mca_mtl_base_module_t *mtl,
 400                       struct ompi_communicator_t *comm)
 401 {
 402     return OMPI_SUCCESS;
 403 }
 404 
 405 
 406 int
 407 ompi_mtl_psm_del_comm(struct mca_mtl_base_module_t *mtl,
 408                       struct ompi_communicator_t *comm)
 409 {
 410     return OMPI_SUCCESS;
 411 }
 412 
 413 
 414 int ompi_mtl_psm_progress( void ) {
 415     psm_error_t err;
 416     mca_mtl_psm_request_t* mtl_psm_request;
 417     psm_mq_status_t psm_status;
 418     psm_mq_req_t req;
 419     int completed = 1;
 420 
 421     do {
 422         err = psm_mq_ipeek(ompi_mtl_psm.mq, &req, NULL);
 423         if (err == PSM_MQ_INCOMPLETE) {
 424             return completed;
 425         } else if (err != PSM_OK) {
 426             goto error;
 427         }
 428 
 429         completed++;
 430 
 431         err = psm_mq_test(&req, &psm_status);
 432         if (err != PSM_OK) {
 433             goto error;
 434         }
 435 
 436         mtl_psm_request = (mca_mtl_psm_request_t*) psm_status.context;
 437 
 438         if (mtl_psm_request->type == OMPI_MTL_PSM_IRECV) {
 439             ompi_mtl_datatype_unpack(mtl_psm_request->convertor,
 440                                      mtl_psm_request->buf,
 441                                      psm_status.msg_length);
 442 
 443             mtl_psm_request->super.ompi_req->req_status.MPI_SOURCE =
 444                     PSM_GET_MQRANK(psm_status.msg_tag);
 445             mtl_psm_request->super.ompi_req->req_status.MPI_TAG =
 446                     PSM_GET_MQUTAG(psm_status.msg_tag);
 447         mtl_psm_request->super.ompi_req->req_status._ucount =
 448             psm_status.nbytes;
 449         }
 450 
 451         if(mtl_psm_request->type == OMPI_MTL_PSM_ISEND) {
 452           if (mtl_psm_request->free_after) {
 453             free(mtl_psm_request->buf);
 454           }
 455         }
 456 
 457         switch (psm_status.error_code) {
 458             case PSM_OK:
 459                 mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
 460                     OMPI_SUCCESS;
 461                 break;
 462             case PSM_MQ_TRUNCATION:
 463                 mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
 464                     MPI_ERR_TRUNCATE;
 465                 break;
 466             default:
 467                 mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
 468                         MPI_ERR_INTERN;
 469         }
 470 
 471         mtl_psm_request->super.completion_callback(&mtl_psm_request->super);
 472 
 473     }
 474     while (1);
 475 
 476  error:
 477     opal_show_help("help-mtl-psm.txt",
 478                    "error polling network", true,
 479                    psm_error_get_string(err));
 480     return 1;
 481 }
 482 

/* [<][>][^][v][top][bottom][index][help] */