root/orte/runtime/orte_init.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. _process_name_print_for_opal
  2. _jobid_print_for_opal
  3. _vpid_print_for_opal
  4. _process_name_compare
  5. _convert_string_to_process_name
  6. _convert_process_name_to_string
  7. _convert_string_to_jobid
  8. orte_init

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2005 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006-2018 Los Alamos National Security, LLC.  All rights
  14  *                         reserved.
  15  * Copyright (c) 2007-2012 Cisco Systems, Inc.  All rights reserved.
  16  * Copyright (c) 2007-2008 Sun Microsystems, Inc.  All rights reserved.
  17  * Copyright (c) 2014-2018 Intel, Inc.  All rights reserved.
  18  * Copyright (c) 2014-2016 Research Organization for Information Science
  19  *                         and Technology (RIST). All rights reserved.
  20  *
  21  * $COPYRIGHT$
  22  *
  23  * Additional copyrights may follow
  24  *
  25  * $HEADER$
  26  */
  27 
  28 /** @file **/
  29 
  30 #include "orte_config.h"
  31 #include "orte/constants.h"
  32 
  33 #include <sys/types.h>
  34 #ifdef HAVE_UNISTD_H
  35 #include <unistd.h>
  36 #endif
  37 
  38 #include "opal/util/error.h"
  39 #include "opal/util/output.h"
  40 #include "opal/util/proc.h"
  41 #include "opal/util/timings.h"
  42 #include "opal/runtime/opal.h"
  43 #include "opal/threads/threads.h"
  44 
  45 #include "orte/util/show_help.h"
  46 #include "orte/mca/ess/base/base.h"
  47 #include "orte/mca/ess/ess.h"
  48 #include "orte/mca/errmgr/errmgr.h"
  49 #include "orte/mca/schizo/base/base.h"
  50 #include "orte/util/listener.h"
  51 #include "orte/util/name_fns.h"
  52 #include "orte/util/proc_info.h"
  53 #include "orte/util/error_strings.h"
  54 #include "orte/orted/pmix/pmix_server.h"
  55 
  56 #include "orte/runtime/runtime.h"
  57 #include "orte/runtime/orte_globals.h"
  58 #include "orte/runtime/orte_locks.h"
  59 
  60 /**
  61  * Static functions used to configure the interactions between the OPAL and
  62  * the runtime.
  63  */
  64 
  65 static char*
  66 _process_name_print_for_opal(const opal_process_name_t procname)
  67 {
  68     orte_process_name_t* rte_name = (orte_process_name_t*)&procname;
  69     return ORTE_NAME_PRINT(rte_name);
  70 }
  71 
  72 static char*
  73 _jobid_print_for_opal(const opal_jobid_t jobid)
  74 {
  75     return ORTE_JOBID_PRINT(jobid);
  76 }
  77 
  78 static char*
  79 _vpid_print_for_opal(const opal_vpid_t vpid)
  80 {
  81     return ORTE_VPID_PRINT(vpid);
  82 }
  83 
  84 static int
  85 _process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2)
  86 {
  87     return orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &p1, &p2);
  88 }
  89 
  90 static int _convert_string_to_process_name(opal_process_name_t *name,
  91                                            const char* name_string)
  92 {
  93     return orte_util_convert_string_to_process_name(name, name_string);
  94 }
  95 
  96 static int _convert_process_name_to_string(char** name_string,
  97                                           const opal_process_name_t *name)
  98 {
  99     return orte_util_convert_process_name_to_string(name_string, name);
 100 }
 101 
 102 static int
 103 _convert_string_to_jobid(opal_jobid_t *jobid, const char *jobid_string)
 104 {
 105     return orte_util_convert_string_to_jobid(jobid, jobid_string);
 106 }
 107 /*
 108  * Whether we have completed orte_init or we are in orte_finalize
 109  */
 110 int orte_initialized = 0;
 111 bool orte_finalizing = false;
 112 bool orte_debug_flag = false;
 113 int orte_debug_verbosity = -1;
 114 char *orte_prohibited_session_dirs = NULL;
 115 bool orte_create_session_dirs = true;
 116 opal_event_base_t *orte_event_base = {0};
 117 bool orte_event_base_active = true;
 118 bool orte_proc_is_bound = false;
 119 int orte_progress_thread_debug = -1;
 120 hwloc_cpuset_t orte_proc_applied_binding = NULL;
 121 
 122 orte_process_name_t orte_name_wildcard = {ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD};
 123 
 124 orte_process_name_t orte_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID};
 125 
 126 
 127 #if OPAL_CC_USE_PRAGMA_IDENT
 128 #pragma ident ORTE_IDENT_STRING
 129 #elif OPAL_CC_USE_IDENT
 130 #ident ORTE_IDENT_STRING
 131 #endif
 132 const char orte_version_string[] = ORTE_IDENT_STRING;
 133 
 134 int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
 135 {
 136     int ret;
 137     char *error = NULL;
 138     OPAL_TIMING_ENV_INIT(tmng);
 139 
 140     if (0 < orte_initialized) {
 141         /* track number of times we have been called */
 142         orte_initialized++;
 143         return ORTE_SUCCESS;
 144     }
 145     orte_initialized++;
 146 
 147     /* Convince OPAL to use our naming scheme */
 148     opal_process_name_print = _process_name_print_for_opal;
 149     opal_vpid_print = _vpid_print_for_opal;
 150     opal_jobid_print = _jobid_print_for_opal;
 151     opal_compare_proc = _process_name_compare;
 152     opal_convert_string_to_process_name = _convert_string_to_process_name;
 153     opal_convert_process_name_to_string = _convert_process_name_to_string;
 154     opal_snprintf_jobid = orte_util_snprintf_jobid;
 155     opal_convert_string_to_jobid = _convert_string_to_jobid;
 156 
 157     OPAL_TIMING_ENV_NEXT(tmng, "initializations");
 158 
 159     /* initialize the opal layer */
 160     if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
 161         error = "opal_init";
 162         goto error;
 163     }
 164 
 165     OPAL_TIMING_ENV_NEXT(tmng, "opal_init");
 166 
 167     /* ensure we know the type of proc for when we finalize */
 168     orte_process_info.proc_type = flags;
 169 
 170     /* setup the locks */
 171     if (ORTE_SUCCESS != (ret = orte_locks_init())) {
 172         error = "orte_locks_init";
 173         goto error;
 174     }
 175 
 176     OPAL_TIMING_ENV_NEXT(tmng, "orte_locks_init");
 177 
 178     /* Register all MCA Params */
 179     if (ORTE_SUCCESS != (ret = orte_register_params())) {
 180         error = "orte_register_params";
 181         goto error;
 182     }
 183 
 184     OPAL_TIMING_ENV_NEXT(tmng, "orte_register_params");
 185 
 186     /* setup the orte_show_help system */
 187     if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
 188         error = "opal_output_init";
 189         goto error;
 190     }
 191 
 192     OPAL_TIMING_ENV_NEXT(tmng, "orte_show_help_init");
 193 
 194     /* register handler for errnum -> string conversion */
 195     opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);
 196 
 197     OPAL_TIMING_ENV_NEXT(tmng, "opal_error_register");
 198 
 199     /* Ensure the rest of the process info structure is initialized */
 200     if (ORTE_SUCCESS != (ret = orte_proc_info())) {
 201         error = "orte_proc_info";
 202         goto error;
 203     }
 204 
 205     OPAL_TIMING_ENV_NEXT(tmng, "orte_proc_info");
 206 
 207     /* we may have modified the local nodename according to
 208      * request to retain/strip the FQDN and prefix, so update
 209      * it here. The OPAL layer will strdup the hostname, so
 210      * we have to free it first to avoid a memory leak */
 211     if (NULL != opal_process_info.nodename) {
 212         free(opal_process_info.nodename);
 213     }
 214     /* opal_finalize_util will call free on this pointer so set from strdup */
 215     opal_process_info.nodename = strdup (orte_process_info.nodename);
 216 
 217     if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
 218         /* let the pmix server register params */
 219         pmix_server_register_params();
 220         opal_set_using_threads (true);
 221         OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params");
 222     }
 223 
 224     /* open the SCHIZO framework as everyone needs it, and the
 225      * ess will use it to help select its component */
 226     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
 227         ORTE_ERROR_LOG(ret);
 228         error = "orte_schizo_base_open";
 229         goto error;
 230     }
 231 
 232     OPAL_TIMING_ENV_NEXT(tmng, "framework_open(schizo)");
 233 
 234     if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
 235         error = "orte_schizo_base_select";
 236         goto error;
 237     }
 238 
 239     OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo_base_select");
 240 
 241     /* if we are an app, let SCHIZO help us determine our environment */
 242     if (ORTE_PROC_IS_APP) {
 243         (void)orte_schizo.check_launch_environment();
 244         OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo.check_launch_environment");
 245     }
 246 
 247     /* open the ESS and select the correct module for this environment */
 248     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
 249         ORTE_ERROR_LOG(ret);
 250         error = "orte_ess_base_open";
 251         goto error;
 252     }
 253 
 254     OPAL_TIMING_ENV_NEXT(tmng, "framework_open(ess)");
 255 
 256     if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
 257         error = "orte_ess_base_select";
 258         goto error;
 259     }
 260 
 261     OPAL_TIMING_ENV_NEXT(tmng, "orte_ess_base_select");
 262 
 263     if (!ORTE_PROC_IS_APP) {
 264         /* ORTE tools "block" in their own loop over the event
 265          * base, so no progress thread is required - apps will
 266          * start their progress thread in ess_base_std_app.c
 267          * at the appropriate point
 268          */
 269         orte_event_base = opal_sync_event_base;
 270     }
 271 
 272     /* initialize the RTE for this environment */
 273     if (ORTE_SUCCESS != (ret = orte_ess.init())) {
 274         error = "orte_ess_init";
 275         goto error;
 276     }
 277 
 278     OPAL_TIMING_ENV_NEXT(tmng, "orte_ess.init");
 279 
 280     /* set the remaining opal_process_info fields. Note that
 281      * the OPAL layer will have initialized these to NULL, and
 282      * anyone between us would not have strdup'd the string, so
 283      * we cannot free it here */
 284     opal_process_info.job_session_dir  = orte_process_info.job_session_dir;
 285     opal_process_info.proc_session_dir = orte_process_info.proc_session_dir;
 286     opal_process_info.num_local_peers  = (int32_t)orte_process_info.num_local_peers;
 287     opal_process_info.my_local_rank    = (int32_t)orte_process_info.my_local_rank;
 288     opal_process_info.cpuset           = orte_process_info.cpuset;
 289 
 290     if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
 291         /* start listening - will be ignored if no listeners
 292          * were registered */
 293         if (ORTE_SUCCESS != (ret = orte_start_listening())) {
 294             ORTE_ERROR_LOG(ret);
 295             error = "orte_start_listening";
 296             goto error;
 297         }
 298     }
 299 
 300     OPAL_TIMING_ENV_NEXT(tmng, "finalize");
 301     /* All done */
 302     return ORTE_SUCCESS;
 303 
 304  error:
 305     if (ORTE_ERR_SILENT != ret) {
 306         orte_show_help("help-orte-runtime",
 307                        "orte_init:startup:internal-failure",
 308                        true, error, ORTE_ERROR_NAME(ret), ret);
 309     }
 310 
 311     return ret;
 312 }

/* [<][>][^][v][top][bottom][index][help] */