root/orte/mca/odls/alps/odls_alps_utils.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_odls_alps_get_rdma_creds

   1 /*
   2  * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2006 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2014      Los Alamos National Security, LLC.  All rights
  13  *                         reserved.
  14  * $COPYRIGHT$
  15  *
  16  * Additional copyrights may follow
  17  *
  18  * $HEADER$
  19  *
  20  * These symbols are in a file by themselves to provide nice linker
  21  * semantics.  Since linkers generally pull in symbols by object
  22  * files, keeping these symbols as the only symbols in this file
  23  * prevents utility programs such as "ompi_info" from having to import
  24  * entire components just to query their version and parameters.
  25  */
  26 
  27 #include "orte_config.h"
  28 #include "orte/constants.h"
  29 
  30 #include <stdlib.h>
  31 #ifdef HAVE_UNISTD_H
  32 #include <unistd.h>
  33 #endif
  34 #include <ctype.h>
  35 #include <sys/syscall.h>
  36 
  37 #include "orte/mca/mca.h"
  38 #include "opal/mca/base/base.h"
  39 #include "opal/util/opal_environ.h"
  40 
  41 #include "orte/mca/odls/odls.h"
  42 #include "orte/mca/odls/base/base.h"
  43 #include "orte/mca/odls/base/odls_private.h"
  44 #include "orte/mca/odls/alps/odls_alps.h"
  45 
  46 int orte_odls_alps_get_rdma_creds(void)
  47 {
  48     int alps_status = 0, num_creds, i, len;
  49     uint64_t apid;
  50     size_t alps_count;
  51     int ret = ORTE_SUCCESS;
  52     alpsAppLLIGni_t *rdmacred_rsp=NULL;
  53     alpsAppGni_t *rdmacred_buf;
  54     char *ptr;
  55     char env_buffer[1024];
  56     static int already_got_creds = 0;
  57 
  58     /*
  59      * If we already put the GNI RDMA credentials into orte_launch_environ,
  60      * no need to do anything.
  61      * TODO: kind of ugly, need to implement an opal_getenv
  62      */
  63 
  64     if (1 == already_got_creds) {
  65         return ORTE_SUCCESS;
  66     }
  67 
  68     /*
  69      * get the Cray HSN RDMA credentials here and stuff them in to the
  70      * PMI env variable format expected by uGNI consumers like the uGNI
  71      * BTL, etc. Stuff into the orte_launch_environ to make sure the
  72      * application processes can actually use the HSN API (uGNI).
  73      */
  74 
  75     if (ORTE_PROC_IS_DAEMON) {
  76 
  77         ret = alps_app_lli_lock();
  78 
  79         /*
  80          * First get our apid
  81          */
  82 
  83         ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
  84         if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
  85             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
  86                                  "%s odls:alps: alps_app_lli_put_request returned %d",
  87                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
  88             ret = ORTE_ERR_FILE_WRITE_FAILURE;
  89             goto fn_exit;
  90         }
  91 
  92         ret = alps_app_lli_get_response (&alps_status, &alps_count);
  93         if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
  94             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
  95                                  "%s odls:alps: alps_app_lli_get_response returned %d",
  96                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
  97             ret = ORTE_ERR_FILE_READ_FAILURE;
  98             goto fn_exit;
  99         }
 100 
 101         ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid));
 102         if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
 103             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 104                                  "%s odls:alps: alps_app_lli_get_response_bytes returned %d",
 105                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 106             ret = ORTE_ERR_FILE_READ_FAILURE;
 107             goto fn_exit;
 108         }
 109 
 110         /*
 111          * now get the GNI rdma credentials info
 112          */
 113 
 114         ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0);
 115         if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
 116             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 117                                  "%s odls:alps: alps_app_lli_put_request returned %d",
 118                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 119             ret = ORTE_ERR_FILE_WRITE_FAILURE;
 120             goto fn_exit;
 121         }
 122 
 123         ret = alps_app_lli_get_response(&alps_status, &alps_count);
 124         if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
 125             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 126                                  "%s odls:alps: alps_app_lli_get_response returned %d",
 127                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
 128             ret = ORTE_ERR_FILE_READ_FAILURE;
 129             goto fn_exit;
 130         }
 131 
 132         rdmacred_rsp = (alpsAppLLIGni_t *)malloc(alps_count);
 133         if (NULL == rdmacred_rsp) {
 134             ret = ORTE_ERR_OUT_OF_RESOURCE;
 135             goto fn_exit;
 136         }
 137 
 138         memset(rdmacred_rsp,0,alps_count);
 139 
 140         ret = alps_app_lli_get_response_bytes(rdmacred_rsp, alps_count);
 141         if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
 142             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 143                                  "%s odls:alps: alps_app_lli_get_response_bytes returned %d",
 144                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 145             free(rdmacred_rsp);
 146             ret = ORTE_ERR_FILE_READ_FAILURE;
 147             goto fn_exit;
 148         }
 149 
 150         ret = alps_app_lli_unlock();
 151 
 152         rdmacred_buf = (alpsAppGni_t *)(rdmacred_rsp->u.buf);
 153 
 154         /*
 155          * now set up the env. variables -
 156          * The cray pmi sets up 4 environment variables:
 157          * PMI_GNI_DEV_ID - format (id0:id1....idX)
 158          * PMI_GNI_LOC_ADDR - format (locaddr0:locaddr1:....locaddrX)
 159          * PMI_GNI_COOKIE - format (cookie0:cookie1:...cookieX)
 160          * PMI_GNI_PTAG - format (ptag0:ptag1:....ptagX)
 161          *
 162          * where X == num_creds - 1
 163          *
 164          * TODO: need in theory to check for possible overrun of env_buffer
 165          */
 166 
 167         num_creds = rdmacred_rsp->count;
 168 
 169         /*
 170          * first build ptag env
 171          */
 172 
 173         memset(env_buffer,0,sizeof(env_buffer));
 174         ptr = env_buffer;
 175         for (i=0; i<num_creds-1; i++) {
 176             len = sprintf(ptr,"%d:",rdmacred_buf[i].ptag);
 177             ptr += len;
 178         }
 179         sprintf(ptr,"%d",rdmacred_buf[num_creds-1].ptag);
 180         ret = opal_setenv("PMI_GNI_PTAG", env_buffer, false, &orte_launch_environ);
 181         if (ret != ORTE_SUCCESS) {
 182             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 183                                  "%s odls:alps: opal_setenv for PMI_GNI_TAG returned %d",
 184                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 185             goto fn_exit;
 186         }
 187 
 188         /*
 189          * the cookie env
 190          */
 191 
 192         memset(env_buffer,0,sizeof(env_buffer));
 193         ptr = env_buffer;
 194         for (i=0; i<num_creds-1; i++) {
 195             len = sprintf(ptr,"%d:",rdmacred_buf[i].cookie);
 196             ptr += len;
 197         }
 198         sprintf(ptr,"%d",rdmacred_buf[num_creds-1].cookie);
 199         ret = opal_setenv("PMI_GNI_COOKIE", env_buffer, false, &orte_launch_environ);
 200         if (ret != ORTE_SUCCESS) {
 201             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 202                                  "%s odls:alps: opal_setenv for PMI_GNI_COOKIE returned %d",
 203                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 204             goto fn_exit;
 205         }
 206 
 207         /*
 208          * nic loc addrs
 209          */
 210 
 211         memset(env_buffer,0,sizeof(env_buffer));
 212         ptr = env_buffer;
 213         for (i=0; i<num_creds-1; i++) {
 214             len = sprintf(ptr,"%d:",rdmacred_buf[i].local_addr);
 215             ptr += len;
 216         }
 217         sprintf(ptr,"%d",rdmacred_buf[num_creds-1].local_addr);
 218         ret = opal_setenv("PMI_GNI_LOC_ADDR", env_buffer, false, &orte_launch_environ);
 219         if (ret != ORTE_SUCCESS) {
 220             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 221                                  "%s odls:alps: opal_setenv for PMI_GNI_LOC_ADDR returned %d",
 222                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 223             goto fn_exit;
 224         }
 225 
 226         /*
 227          * finally device ids
 228          */
 229 
 230         memset(env_buffer,0,sizeof(env_buffer));
 231         ptr = env_buffer;
 232         for (i=0; i<num_creds-1; i++) {
 233             len = sprintf(ptr,"%d:",rdmacred_buf[i].device_id);
 234             ptr += len;
 235         }
 236         sprintf(ptr,"%d",rdmacred_buf[num_creds-1].device_id);
 237         ret = opal_setenv("PMI_GNI_DEV_ID", env_buffer, false, &orte_launch_environ);
 238         if (ret != ORTE_SUCCESS) {
 239             OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
 240                                  "%s odls:alps: opal_setenv for PMI_GNI_DEV_ID returned %d",
 241                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
 242             goto fn_exit;
 243         }
 244 
 245     }
 246 
 247    fn_exit:
 248     if (ORTE_SUCCESS == ret) already_got_creds = 1;
 249     return ret;
 250 }
 251 
 252 

/* [<][>][^][v][top][bottom][index][help] */