root/opal/mca/pmix/cray/pmix_cray_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pmix_cray_component_open
  2. pmix_cray_component_query
  3. pmix_cray_component_close

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2014      Intel, Inc.  All rights reserved.
   4  * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
   5  *                         reserved.
   6  * Copyright (c) 2016 Cisco Systems, Inc.  All rights reserved.
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  *
  13  * These symbols are in a file by themselves to provide nice linker
  14  * semantics.  Since linkers generally pull in symbols by object
  15  * files, keeping these symbols as the only symbols in this file
  16  * prevents utility programs such as "ompi_info" from having to import
  17  * entire components just to query their version and parameters.
  18  */
  19 
  20 #include "opal_config.h"
  21 
  22 #include "opal/constants.h"
  23 #include "opal/mca/pmix/pmix.h"
  24 #include "opal/util/show_help.h"
  25 #include "pmix_cray.h"
  26 #include <sys/syscall.h>
  27 #include <pmi.h>
  28 
  29 /*
  30  * Public string showing the pmix cray component version number
  31  */
  32 const char *opal_pmix_cray_component_version_string =
  33     "OPAL cray pmix MCA component version " OPAL_VERSION;
  34 
  35 /*
  36  * Local function
  37  */
  38 static int pmix_cray_component_open(void);
  39 static int pmix_cray_component_query(mca_base_module_t **module, int *priority);
  40 static int pmix_cray_component_close(void);
  41 
  42 
  43 /*
  44  * Instantiate the public struct with all of our public information
  45  * and pointers to our public functions in it
  46  */
  47 
  48 opal_pmix_cray_component_t mca_pmix_cray_component = {
  49     {
  50     /* First, the mca_component_t struct containing meta information
  51        about the component itself */
  52 
  53         .base_version = {
  54             /* Indicate that we are a pmix v1.1.0 component (which also
  55                implies a specific MCA version) */
  56 
  57             OPAL_PMIX_BASE_VERSION_2_0_0,
  58 
  59             /* Component name and version */
  60 
  61             .mca_component_name = "cray",
  62             MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
  63                                   OPAL_RELEASE_VERSION),
  64 
  65             /* Component open and close functions */
  66 
  67             .mca_open_component = pmix_cray_component_open,
  68             .mca_close_component = pmix_cray_component_close,
  69             .mca_query_component = pmix_cray_component_query,
  70         },
  71         /* Next the MCA v1.0.0 component meta data */
  72         .base_data = {
  73             /* The component is checkpoint ready */
  74             MCA_BASE_METADATA_PARAM_CHECKPOINT
  75         }
  76     },
  77     .cache_local = NULL,
  78     .cache_global = NULL,
  79 };
  80 
  81 static int pmix_cray_component_open(void)
  82 {
  83     /*
  84      * Turns out that there's a lot of reliance on libevent
  85      * and the default behavior of Cray PMI to fork
  86      * in a constructor breaks libevent.
  87      *
  88      * Open MPI will not launch correctly on Cray XE/XC systems
  89      * under these conditions:
  90      *
  91      * 1) direct launch using aprun, and
  92      * 2) PMI_NO_FORK env. variable is not set, nor was
  93      * 3) --disable-dlopen used as part of configury
  94      *
  95      * Under SLURM, PMI_NO_FORK is always set, so we can combine
  96      * the check for conditions 1) and 2) together
  97      */
  98 
  99 #if OPAL_ENABLE_DLOPEN_SUPPORT
 100     if (NULL == getenv("PMI_NO_FORK")) {
 101         opal_show_help("help-pmix-cray.txt", "aprun-not-supported", true);
 102         exit(-1);
 103     }
 104 #endif
 105     return OPAL_SUCCESS;
 106 }
 107 
 108 static int pmix_cray_component_query(mca_base_module_t **module, int *priority)
 109 {
 110     int rc;
 111     const char proc_job_file[]="/proc/job";
 112     FILE *fd = NULL, *fd_task_is_app = NULL;
 113     char task_is_app_fname[PATH_MAX];
 114 
 115     /* disqualify ourselves if not running in a Cray PAGG container, or we
 116        were launched by the orte/mpirun launcher */
 117     fd = fopen(proc_job_file, "r");
 118     if ((fd == NULL) || (getenv("OMPI_NO_USE_CRAY_PMI") != NULL)) {
 119         *priority = 0;
 120         *module = NULL;
 121         rc = OPAL_ERROR;
 122     } else {
 123         snprintf(task_is_app_fname,sizeof(task_is_app_fname),
 124                  "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
 125         fd_task_is_app = fopen(task_is_app_fname, "r");
 126         if (fd_task_is_app != NULL) {   /* okay we're in a PAGG container,
 127                                            and we are an app task (not just a process
 128                                            running on a mom node, for example),
 129                                            so we should give cray pmi a shot. */
 130             *priority = 90;
 131             *module = (mca_base_module_t *)&opal_pmix_cray_module;
 132             fclose(fd_task_is_app);
 133             rc = OPAL_SUCCESS;
 134         }
 135         fclose(fd);
 136     }
 137 
 138     return rc;
 139 }
 140 
 141 static int pmix_cray_component_close(void)
 142 {
 143     return OPAL_SUCCESS;
 144 }

/* [<][>][^][v][top][bottom][index][help] */