root/orte/runtime/orte_mca_params.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_register_params

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2008 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007-2011 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
  14  * Copyright (c) 2012-2013 Los Alamos National Security, LLC.
  15  *                         All rights reserved
  16  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2014-2018 Research Organization for Information Science
  18  *                         and Technology (RIST).  All rights reserved.
  19  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  */
  26 
  27 #include "orte_config.h"
  28 #include "orte/constants.h"
  29 #include "orte/types.h"
  30 
  31 #ifdef HAVE_SYS_TIME_H
  32 #include <sys/time.h>
  33 #endif
  34 #include <stdio.h>
  35 
  36 #include "opal/mca/base/mca_base_var.h"
  37 #include "opal/mca/installdirs/installdirs.h"
  38 #include "opal/util/output.h"
  39 #include "opal/util/argv.h"
  40 #include "opal/util/printf.h"
  41 
  42 #include "orte/util/proc_info.h"
  43 #include "orte/mca/errmgr/errmgr.h"
  44 
  45 #include "orte/runtime/runtime.h"
  46 #include "orte/runtime/orte_globals.h"
  47 
  48 static bool passed_thru = false;
  49 static int orte_progress_thread_debug_level = -1;
  50 static char *orte_xml_file = NULL;
  51 static char *orte_fork_agent_string = NULL;
  52 static char *orte_tmpdir_base = NULL;
  53 static char *orte_local_tmpdir_base = NULL;
  54 static char *orte_remote_tmpdir_base = NULL;
  55 static char *orte_top_session_dir = NULL;
  56 static char *orte_jobfam_session_dir = NULL;
  57 
  58 int orte_register_params(void)
  59 {
  60     int id;
  61     opal_output_stream_t lds;
  62 
  63     /* only go thru this once - mpirun calls it twice, which causes
  64      * any error messages to show up twice
  65      */
  66     if (passed_thru) {
  67         return ORTE_SUCCESS;
  68     }
  69     passed_thru = true;
  70 
  71     /* get a clean output channel too - need to do this here because
  72      * we use it below, and orterun and some other tools call this
  73      * function prior to calling orte_init
  74      */
  75     OBJ_CONSTRUCT(&lds, opal_output_stream_t);
  76     lds.lds_want_stdout = true;
  77     orte_clean_output = opal_output_open(&lds);
  78     OBJ_DESTRUCT(&lds);
  79 
  80     orte_help_want_aggregate = true;
  81     (void) mca_base_var_register ("orte", "orte", "base", "help_aggregate",
  82                                   "If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually.  This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
  83                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
  84                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
  85                                   &orte_help_want_aggregate);
  86 
  87     /* LOOK FOR A TMP DIRECTORY BASE */
  88     /* Several options are provided to cover a range of possibilities:
  89      *
  90      * (a) all processes need to use a specified location as the base
  91      *     for tmp directories
  92      * (b) daemons on remote nodes need to use a specified location, but
  93      *     one different from that used by mpirun
  94      * (c) mpirun needs to use a specified location, but one different
  95      *     from that used on remote nodes
  96      */
  97     orte_tmpdir_base = NULL;
  98     (void) mca_base_var_register ("orte", "orte", NULL, "tmpdir_base",
  99                                   "Base of the session directory tree to be used by all processes",
 100                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 101                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
 102                                   &orte_tmpdir_base);
 103 
 104     orte_local_tmpdir_base = NULL;
 105     (void) mca_base_var_register ("orte", "orte", NULL, "local_tmpdir_base",
 106                                   "Base of the session directory tree to be used by orterun/mpirun",
 107                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 108                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
 109                                   &orte_local_tmpdir_base);
 110 
 111     orte_remote_tmpdir_base = NULL;
 112     (void) mca_base_var_register ("orte", "orte", NULL, "remote_tmpdir_base",
 113                                   "Base of the session directory tree on remote nodes, if required to be different from head node",
 114                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 115                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
 116                                   &orte_remote_tmpdir_base);
 117 
 118     /* if a global tmpdir was specified, then we do not allow specification
 119      * of the local or remote values to avoid confusion
 120      */
 121     if (NULL != orte_tmpdir_base &&
 122         (NULL != orte_local_tmpdir_base || NULL != orte_remote_tmpdir_base)) {
 123         opal_output(orte_clean_output,
 124                     "------------------------------------------------------------------\n"
 125                     "The MCA param orte_tmpdir_base was specified, which sets the base\n"
 126                     "of the temporary directory tree for all procs. However, values for\n"
 127                     "the local and/or remote tmpdir base were also given. This can lead\n"
 128                     "to confusion and is therefore not allowed. Please specify either a\n"
 129                     "global tmpdir base OR a local/remote tmpdir base value\n"
 130                     "------------------------------------------------------------------");
 131         exit(1);
 132     }
 133 
 134     if (NULL != orte_tmpdir_base) {
 135         if (NULL != orte_process_info.tmpdir_base) {
 136             free(orte_process_info.tmpdir_base);
 137         }
 138         orte_process_info.tmpdir_base = strdup (orte_tmpdir_base);
 139     } else if (ORTE_PROC_IS_HNP && NULL != orte_local_tmpdir_base) {
 140         /* orterun will pickup the value for its own use */
 141         if (NULL != orte_process_info.tmpdir_base) {
 142             free(orte_process_info.tmpdir_base);
 143         }
 144         orte_process_info.tmpdir_base = strdup (orte_local_tmpdir_base);
 145     } else if (ORTE_PROC_IS_DAEMON && NULL != orte_remote_tmpdir_base) {
 146         /* orterun will pickup the value and forward it along, but must not
 147          * use it in its own work. So only a daemon needs to get it, and the
 148          * daemon will pass it down to its application procs. Note that orterun
 149          * will pass -its- value to any procs local to it
 150          */
 151         if (NULL != orte_process_info.tmpdir_base) {
 152             free(orte_process_info.tmpdir_base);
 153         }
 154         orte_process_info.tmpdir_base = strdup (orte_remote_tmpdir_base);
 155     }
 156 
 157     orte_top_session_dir = NULL;
 158     (void) mca_base_var_register ("orte", "orte", NULL, "top_session_dir",
 159                                   "Top of the session directory tree for applications",
 160                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 161                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
 162                                   &orte_top_session_dir);
 163 
 164     if (NULL != orte_top_session_dir) {
 165          if (NULL != orte_process_info.top_session_dir) {
 166             free(orte_process_info.top_session_dir);
 167         }
 168         orte_process_info.top_session_dir = strdup(orte_top_session_dir);
 169     }
 170 
 171     orte_jobfam_session_dir = NULL;
 172     (void) mca_base_var_register ("orte", "orte", NULL, "jobfam_session_dir",
 173                                   "The jobfamily session directory for applications",
 174                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 175                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
 176                                   &orte_jobfam_session_dir);
 177 
 178     if (NULL != orte_jobfam_session_dir) {
 179         if (NULL != orte_process_info.jobfam_session_dir) {
 180             free(orte_process_info.jobfam_session_dir);
 181         }
 182         orte_process_info.jobfam_session_dir = strdup(orte_jobfam_session_dir);
 183     }
 184 
 185     orte_prohibited_session_dirs = NULL;
 186     (void) mca_base_var_register ("orte", "orte", NULL, "no_session_dirs",
 187                                   "Prohibited locations for session directories (multiple locations separated by ',', default=NULL)",
 188                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 189                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 190                                   &orte_prohibited_session_dirs);
 191 
 192     orte_create_session_dirs = true;
 193     (void) mca_base_var_register ("orte", "orte", NULL, "create_session_dirs",
 194                                   "Create session directories",
 195                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 196                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 197                                   &orte_create_session_dirs);
 198 
 199     orte_execute_quiet = false;
 200     (void) mca_base_var_register ("orte", "orte", NULL, "execute_quiet",
 201                                   "Do not output error and help messages",
 202                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 203                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 204                                   &orte_execute_quiet);
 205 
 206     orte_report_silent_errors = false;
 207     (void) mca_base_var_register ("orte", "orte", NULL, "report_silent_errors",
 208                                   "Report all errors, including silent ones",
 209                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 210                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 211                                   &orte_report_silent_errors);
 212 
 213     orte_debug_flag = false;
 214     (void) mca_base_var_register ("orte", "orte", NULL, "debug",
 215                                   "Top-level ORTE debug switch (default: false)",
 216                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 217                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 218                                   &orte_debug_flag);
 219 
 220     orte_debug_verbosity = -1;
 221     (void) mca_base_var_register ("orte", "orte", NULL, "debug_verbose",
 222                                   "Verbosity level for ORTE debug messages (default: 1)",
 223                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 224                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 225                                   &orte_debug_verbosity);
 226 
 227     orte_debug_daemons_file_flag = false;
 228     (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons_file",
 229                                   "Whether want stdout/stderr of daemons to go to a file or not",
 230                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 231                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 232                                   &orte_debug_daemons_file_flag);
 233     /* If --debug-daemons-file was specified, that also implies
 234        --debug-daemons */
 235     if (orte_debug_daemons_file_flag) {
 236         orte_debug_daemons_flag = true;
 237 
 238         /* value can't change */
 239         (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons",
 240                                       "Whether to debug the ORTE daemons or not",
 241                                       MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 242                                       OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_CONSTANT,
 243                                       &orte_debug_daemons_flag);
 244     } else {
 245         orte_debug_daemons_flag = false;
 246 
 247         (void) mca_base_var_register ("orte", "orte", NULL, "debug_daemons",
 248                                       "Whether to debug the ORTE daemons or not",
 249                                       MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 250                                       OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 251                                       &orte_debug_daemons_flag);
 252     }
 253 
 254     orte_progress_thread_debug_level = -1;
 255     (void) mca_base_var_register ("orte", "orte", NULL, "progress_thread_debug",
 256                                   "Debug level for ORTE progress threads",
 257                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 258                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 259                                   &orte_progress_thread_debug_level);
 260 
 261     if (0 <= orte_progress_thread_debug_level) {
 262         orte_progress_thread_debug = opal_output_open(NULL);
 263         opal_output_set_verbosity(orte_progress_thread_debug,
 264                                   orte_progress_thread_debug_level);
 265     }
 266 
 267     /* do we want session output left open? */
 268     orte_leave_session_attached = false;
 269     (void) mca_base_var_register ("orte", "orte", NULL, "leave_session_attached",
 270                                   "Whether applications and/or daemons should leave their sessions "
 271                                   "attached so that any output can be received - this allows X forwarding "
 272                                   "without all the attendant debugging output",
 273                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 274                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 275                                   &orte_leave_session_attached);
 276 
 277     /* if any debug level is set, ensure we output debug level dumps */
 278     if (orte_debug_flag || orte_debug_daemons_flag || orte_leave_session_attached) {
 279         orte_devel_level_output = true;
 280     }
 281 
 282     /* See comment in orte/tools/orterun/orterun.c about this MCA
 283        param (this param is internal) */
 284     orte_in_parallel_debugger = false;
 285     (void) mca_base_var_register ("orte", "orte", NULL, "in_parallel_debugger",
 286                                   "Whether the application is being debugged "
 287                                   "in a parallel debugger (default: false)",
 288                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
 289                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 290                                   &orte_in_parallel_debugger);
 291 
 292     orte_debugger_dump_proctable = false;
 293     (void) mca_base_var_register ("orte", "orte", NULL, "output_debugger_proctable",
 294                                   "Whether or not to output the debugger proctable after launch (default: false)",
 295                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 296                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
 297                                   &orte_debugger_dump_proctable);
 298 
 299     orte_debugger_test_daemon = NULL;
 300     (void) mca_base_var_register ("orte", "orte", NULL, "debugger_test_daemon",
 301                                   "Name of the executable to be used to simulate a debugger colaunch (relative or absolute path)",
 302                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 303                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 304                                   &orte_debugger_test_daemon);
 305 
 306     orte_debugger_test_attach = false;
 307     (void) mca_base_var_register ("orte", "orte", NULL, "debugger_test_attach",
 308                                   "Test debugger colaunch after debugger attachment",
 309                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 310                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 311                                   &orte_debugger_test_attach);
 312 
 313     orte_debugger_check_rate = 0;
 314     (void) mca_base_var_register ("orte", "orte", NULL, "debugger_check_rate",
 315                                   "Set rate (in secs) for auto-detect of debugger attachment (0 => do not check)",
 316                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 317                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 318                                   &orte_debugger_check_rate);
 319 
 320     orte_do_not_launch = false;
 321     (void) mca_base_var_register ("orte", "orte", NULL, "do_not_launch",
 322                                   "Perform all necessary operations to prepare to launch the application, but do not actually launch it",
 323                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 324                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 325                                   &orte_do_not_launch);
 326 
 327     orted_spin_flag = false;
 328     (void) mca_base_var_register ("orte", "orte", NULL, "daemon_spin",
 329                                   "Have any orteds spin until we can connect a debugger to them",
 330                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 331                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 332                                   &orted_spin_flag);
 333 
 334     orted_debug_failure = ORTE_VPID_INVALID;
 335     (void) mca_base_var_register ("orte", "orte", NULL, "daemon_fail",
 336                                   "Have the specified orted fail after init for debugging purposes",
 337                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 338                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 339                                   &orted_debug_failure);
 340 
 341     orted_debug_failure_delay = 0;
 342     (void) mca_base_var_register ("orte", "orte", NULL, "daemon_fail_delay",
 343                                   "Have the specified orted fail after specified number of seconds (default: 0 => no delay)",
 344                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 345                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 346                                   &orted_debug_failure_delay);
 347 
 348     orte_startup_timeout = 0;
 349     (void) mca_base_var_register ("orte", "orte", NULL, "startup_timeout",
 350                                   "Seconds to wait for startup or job launch before declaring failed_to_start (default: 0 => do not check)",
 351                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 352                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 353                                   &orte_startup_timeout);
 354 
 355     /* User-level debugger info string */
 356     orte_base_user_debugger = "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@";
 357     (void) mca_base_var_register ("orte", "orte", NULL, "base_user_debugger",
 358                                   "Sequence of user-level debuggers to search for in orterun",
 359                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 360                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 361                                   &orte_base_user_debugger);
 362 
 363 #if 0
 364     mca_base_param_reg_int_name("orte", "abort_timeout",
 365                                 "Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)",
 366                                 false, false, 1, &value);
 367     orte_max_timeout = 1000000.0 * value;  /* convert to usec */
 368 
 369     mca_base_param_reg_int_name("orte", "timeout_step",
 370                                 "Time to wait [in usecs/proc] before aborting an ORTE operation (default: 1000 usec/proc)",
 371                                 false, false, 1000, &orte_timeout_usec_per_proc);
 372 #endif
 373 
 374     /* default hostfile */
 375     orte_default_hostfile = NULL;
 376     (void) mca_base_var_register ("orte", "orte", NULL, "default_hostfile",
 377                                   "Name of the default hostfile (relative or absolute path, \"none\" to ignore environmental or default MCA param setting)",
 378                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 379                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 380                                   &orte_default_hostfile);
 381 
 382     if (NULL == orte_default_hostfile) {
 383         /* nothing was given, so define the default */
 384         opal_asprintf(&orte_default_hostfile, "%s/openmpi-default-hostfile", opal_install_dirs.sysconfdir);
 385         /* flag that nothing was given */
 386         orte_default_hostfile_given = false;
 387     } else if (0 == strcmp(orte_default_hostfile, "none")) {
 388         free (orte_default_hostfile);
 389         orte_default_hostfile = NULL;
 390         /* flag that it was given */
 391         orte_default_hostfile_given = true;
 392     } else {
 393         /* flag that it was given */
 394         orte_default_hostfile_given = true;
 395     }
 396 
 397     /* default dash-host */
 398     orte_default_dash_host = NULL;
 399     (void) mca_base_var_register ("orte", "orte", NULL, "default_dash_host",
 400                                   "Default -host setting (specify \"none\" to ignore environmental or default MCA param setting)",
 401                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 402                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 403                                   &orte_default_dash_host);
 404     if (NULL != orte_default_dash_host &&
 405         0 == strcmp(orte_default_dash_host, "none")) {
 406         free(orte_default_dash_host);
 407         orte_default_dash_host = NULL;
 408     }
 409 
 410     /* whether or not to keep FQDN hostnames */
 411     orte_keep_fqdn_hostnames = false;
 412     (void) mca_base_var_register ("orte", "orte", NULL, "keep_fqdn_hostnames",
 413                                   "Whether or not to keep FQDN hostnames [default: no]",
 414                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 415                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 416                                   &orte_keep_fqdn_hostnames);
 417 
 418     /* whether or not to retain aliases of hostnames */
 419     orte_retain_aliases = false;
 420     (void) mca_base_var_register ("orte", "orte", NULL, "retain_aliases",
 421                                   "Whether or not to keep aliases for host names [default: no]",
 422                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 423                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 424                                   &orte_retain_aliases);
 425 
 426     orte_hostname_cutoff = 1000;
 427     (void) mca_base_var_register ("orte", "orte", NULL, "hostname_cutoff",
 428                                   "Pass hostnames to all procs when #nodes is less than cutoff [default:1000]",
 429                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 430                                   OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_READONLY,
 431                                   &orte_hostname_cutoff);
 432 
 433     /* which alias to use in MPIR_proctab */
 434     orte_use_hostname_alias = 1;
 435     (void) mca_base_var_register ("orte", "orte", NULL, "hostname_alias_index",
 436                                   "If hostname aliases are being retained, which one to use for the debugger proc table [default: 1st alias]",
 437                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 438                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 439                                   &orte_use_hostname_alias);
 440 
 441     orte_xml_output = false;
 442     (void) mca_base_var_register ("orte", "orte", NULL, "xml_output",
 443                                   "Display all output in XML format (default: false)",
 444                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 445                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 446                                   &orte_xml_output);
 447 
 448     /* whether to tag output */
 449     /* if we requested xml output, be sure to tag the output as well */
 450     orte_tag_output = orte_xml_output;
 451     (void) mca_base_var_register ("orte", "orte", NULL, "tag_output",
 452                                   "Tag all output with [job,rank] (default: false)",
 453                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 454                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 455                                   &orte_tag_output);
 456     if (orte_xml_output) {
 457         orte_tag_output = true;
 458     }
 459 
 460 
 461     orte_xml_file = NULL;
 462     (void) mca_base_var_register ("orte", "orte", NULL, "xml_file",
 463                                   "Provide all output in XML format to the specified file",
 464                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 465                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 466                                   &orte_xml_file);
 467     if (NULL != orte_xml_file) {
 468         if (ORTE_PROC_IS_HNP && NULL == orte_xml_fp) {
 469             /* only the HNP opens this file! Make sure it only happens once */
 470             orte_xml_fp = fopen(orte_xml_file, "w");
 471             if (NULL == orte_xml_fp) {
 472                 opal_output(0, "Could not open specified xml output file: %s", orte_xml_file);
 473                 return ORTE_ERROR;
 474             }
 475         }
 476         /* ensure we set the flags to tag output */
 477         orte_xml_output = true;
 478         orte_tag_output = true;
 479     } else {
 480         /* default to stdout */
 481         orte_xml_fp = stdout;
 482     }
 483 
 484     /* whether to timestamp output */
 485     orte_timestamp_output = false;
 486     (void) mca_base_var_register ("orte", "orte", NULL, "timestamp_output",
 487                                   "Timestamp all application process output (default: false)",
 488                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 489                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 490                                   &orte_timestamp_output);
 491 
 492     orte_show_resolved_nodenames = false;
 493     (void) mca_base_var_register ("orte", "orte", NULL, "show_resolved_nodenames",
 494                                   "Display any node names that are resolved to a different name (default: false)",
 495                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 496                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 497                                   &orte_show_resolved_nodenames);
 498 
 499     /* allow specification of the launch agent */
 500     orte_launch_agent = "orted";
 501     (void) mca_base_var_register ("orte", "orte", NULL, "launch_agent",
 502                                   "Command used to start processes on remote nodes (default: orted)",
 503                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 504                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 505                                   &orte_launch_agent);
 506 
 507     orte_fork_agent_string = NULL;
 508     (void) mca_base_var_register ("orte", "orte", NULL, "fork_agent",
 509                                   "Command used to fork processes on remote nodes (default: NULL)",
 510                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 511                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 512                                   &orte_fork_agent_string);
 513 
 514     if (NULL != orte_fork_agent_string) {
 515         orte_fork_agent = opal_argv_split(orte_fork_agent_string, ' ');
 516     }
 517 
 518     /* whether or not to require RM allocation */
 519     orte_allocation_required = false;
 520     (void) mca_base_var_register ("orte", "orte", NULL, "allocation_required",
 521                                   "Whether or not an allocation by a resource manager is required [default: no]",
 522                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 523                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 524                                   &orte_allocation_required);
 525 
 526     /* whether or not to map stddiag to stderr */
 527     orte_map_stddiag_to_stderr = false;
 528     (void) mca_base_var_register ("orte", "orte", NULL, "map_stddiag_to_stderr",
 529                                   "Map output from opal_output to stderr of the local process [default: no]",
 530                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 531                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 532                                   &orte_map_stddiag_to_stderr);
 533 
 534     /* whether or not to map stddiag to stderr */
 535     orte_map_stddiag_to_stdout = false;
 536     (void) mca_base_var_register ("orte", "orte", NULL, "map_stddiag_to_stdout",
 537                                   "Map output from opal_output to stdout of the local process [default: no]",
 538                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 539                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 540                                   &orte_map_stddiag_to_stdout);
 541     if( orte_map_stddiag_to_stderr && orte_map_stddiag_to_stdout ) {
 542         opal_output(0, "The options \"orte_map_stddiag_to_stderr\" and \"orte_map_stddiag_to_stdout\" are mutually exclusive. They cannot both be set to true.");
 543         return ORTE_ERROR;
 544     }
 545 
 546     /* generate new terminal windows to display output from specified ranks */
 547     orte_xterm = NULL;
 548     (void) mca_base_var_register ("orte", "orte", NULL, "xterm",
 549                                   "Create a new xterm window and display output from the specified ranks there [default: none]",
 550                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 551                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 552                                   &orte_xterm);
 553     if (NULL != orte_xterm) {
 554         /* if an xterm request is given, we have to leave any ssh
 555          * sessions attached so the xterm window manager can get
 556          * back to the controlling terminal
 557          */
 558         orte_leave_session_attached = true;
 559         /* also want to redirect stddiag output from opal_output
 560          * to stderr from the process so those messages show
 561          * up in the xterm window instead of being forwarded to mpirun
 562          */
 563         orte_map_stddiag_to_stderr = true;
 564     }
 565 
 566     /* whether or not to report launch progress */
 567     orte_report_launch_progress = false;
 568     (void) mca_base_var_register ("orte", "orte", NULL, "report_launch_progress",
 569                                   "Output a brief periodic report on launch progress [default: no]",
 570                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 571                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 572                                   &orte_report_launch_progress);
 573 
 574     /* cluster hardware info detected by orte only */
 575     orte_local_cpu_type = NULL;
 576     (void) mca_base_var_register ("orte", "orte", NULL, "cpu_type",
 577                                   "cpu type detected in node",
 578                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
 579                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 580                                   &orte_local_cpu_type);
 581 
 582     orte_local_cpu_model = NULL;
 583     (void) mca_base_var_register ("orte", "orte", NULL, "cpu_model",
 584                                   "cpu model detected in node",
 585                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
 586                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 587                                   &orte_local_cpu_model);
 588 
 589     /* tool communication controls */
 590     orte_report_events_uri = NULL;
 591     (void) mca_base_var_register ("orte", "orte", NULL, "report_events",
 592                                   "URI to which events are to be reported (default: NULL)",
 593                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 594                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 595                                   &orte_report_events_uri);
 596     if (NULL != orte_report_events_uri) {
 597         orte_report_events = true;
 598     }
 599 
 600     /* barrier control */
 601     orte_do_not_barrier = false;
 602     (void) mca_base_var_register ("orte", "orte", NULL, "do_not_barrier",
 603                                   "Do not barrier in orte_init",
 604                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_INTERNAL,
 605                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 606                                   &orte_do_not_barrier);
 607 
 608     orte_enable_recovery = false;
 609     (void) mca_base_var_register ("orte", "orte", NULL, "enable_recovery",
 610                                   "Enable recovery from process failure [Default = disabled]",
 611                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 612                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 613                                   &orte_enable_recovery);
 614 
 615     orte_max_restarts = 0;
 616     (void) mca_base_var_register ("orte", "orte", NULL, "max_restarts",
 617                                   "Max number of times to restart a failed process",
 618                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 619                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 620                                   &orte_max_restarts);
 621 
 622     if (!orte_enable_recovery && orte_max_restarts != 0) {
 623         if (ORTE_PROC_IS_HNP) {
 624             opal_output(orte_clean_output,
 625                         "------------------------------------------------------------------\n"
 626                         "The MCA param orte_enable_recovery was not set to true, but\n"
 627                         "a value was provided for the number of restarts:\n\n"
 628                         "Max restarts: %d\n"
 629                         "We are enabling process recovery and continuing execution. To avoid\n"
 630                         "this warning in the future, please set the orte_enable_recovery\n"
 631                         "param to non-zero.\n"
 632                         "------------------------------------------------------------------",
 633                         orte_max_restarts);
 634         }
 635         orte_enable_recovery = true;
 636     }
 637 
 638     orte_abort_non_zero_exit = true;
 639     (void) mca_base_var_register ("orte", "orte", NULL, "abort_on_non_zero_status",
 640                                   "Abort the job if any process returns a non-zero exit status - no restart in such cases",
 641                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 642                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 643                                   &orte_abort_non_zero_exit);
 644 
 645     orte_allowed_exit_without_sync = false;
 646     (void) mca_base_var_register ("orte", "orte", NULL, "allowed_exit_without_sync",
 647                                   "Process exiting without calling finalize will not trigger job termination",
 648                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 649                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 650                                   &orte_allowed_exit_without_sync);
 651 
 652     orte_report_child_jobs_separately = false;
 653     (void) mca_base_var_register ("orte", "orte", NULL, "report_child_jobs_separately",
 654                                   "Return the exit status of the primary job only",
 655                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 656                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 657                                   &orte_report_child_jobs_separately);
 658 
 659 
 660 #if 0
 661     /* XXX -- unused parameter */
 662     mca_base_param_reg_int_name("orte", "child_time_to_exit",
 663                                 "Max time a spawned child job is allowed to run after the primary job has terminated (seconds)",
 664                                 false, false,
 665                                 INT_MAX, &value);
 666     orte_child_time_to_exit.tv_sec = value;
 667     orte_child_time_to_exit.tv_usec = 0;
 668 #endif
 669 
 670     orte_stat_history_size = 1;
 671     (void) mca_base_var_register ("orte", "orte", NULL, "stat_history_size",
 672                                   "Number of stat samples to keep",
 673                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 674                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 675                                   &orte_stat_history_size);
 676 
 677     orte_no_vm = false;
 678     id = mca_base_var_register ("orte", "orte", NULL, "no_vm",
 679                                 "Do not build the VM at start to detect topologies",
 680                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 681                                 OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 682                                 &orte_no_vm);
 683     /* register a synonym for old name */
 684     mca_base_var_register_synonym (id, "orte", "state", "novm", "select", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
 685 
 686     orte_max_vm_size = -1;
 687     (void) mca_base_var_register ("orte", "orte", NULL, "max_vm_size",
 688                                   "Maximum size of virtual machine - used to subdivide allocation",
 689                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 690                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 691                                   &orte_max_vm_size);
 692 
 693     if (opal_hwloc_use_hwthreads_as_cpus) {
 694         orte_set_slots = "hwthreads";
 695     } else {
 696         orte_set_slots = "cores";
 697     }
 698     (void) mca_base_var_register ("orte", "orte", NULL, "set_default_slots",
 699                                   "Set the number of slots on nodes that lack such info to the"
 700                                   " number of specified objects [a number, \"cores\" (default),"
 701                                   " \"numas\", \"sockets\", \"hwthreads\" (default if hwthreads_as_cpus is set),"
 702                                   " or \"none\" to skip this option]",
 703                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 704                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 705                                   &orte_set_slots);
 706 
 707     /* should we display the allocation after determining it? */
 708     orte_display_allocation = false;
 709     id = mca_base_var_register ("orte", "orte", NULL, "display_alloc",
 710                                 "Whether to display the allocation after it is determined",
 711                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 712                                 OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 713                                 &orte_display_allocation);
 714     /* register a synonym for old name -- should we remove this now? */
 715     mca_base_var_register_synonym (id, "orte", "ras", "base", "display_alloc", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
 716 
 717     /* should we display a detailed (developer-quality) version of the allocation after determining it? */
 718     orte_devel_level_output = false;
 719     id = mca_base_var_register ("orte", "orte", NULL, "display_devel_alloc",
 720                                 "Whether to display a developer-detail allocation after it is determined",
 721                                 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 722                                 OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 723                                 &orte_devel_level_output);
 724     /* register a synonym for old name -- should we remove this now? */
 725     mca_base_var_register_synonym (id, "orte", "ras", "base", "display_devel_alloc", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
 726 
 727     if (orte_devel_level_output) {
 728         orte_display_allocation = true;
 729     }
 730 
 731     /* should we treat any -host directives as "soft" - i.e., desired
 732      * but not required
 733      */
 734     orte_soft_locations = false;
 735     (void) mca_base_var_register ("orte", "orte", NULL, "soft_locations",
 736                                   "Treat -host directives as desired, but not required",
 737                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 738                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 739                                   &orte_soft_locations);
 740 
 741     /* allow specification of the cores to be used by daemons */
 742     orte_daemon_cores = NULL;
 743     (void) mca_base_var_register ("orte", "orte", NULL, "daemon_cores",
 744                                   "Restrict the ORTE daemons (including mpirun) to operate on the specified cores (comma-separated list of ranges)",
 745                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 746                                   OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
 747                                   &orte_daemon_cores);
 748 
 749     /* Amount of time to wait for a stack trace to return from the daemons */
 750     orte_stack_trace_wait_timeout = 30;
 751     (void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace",
 752                                   "Seconds to wait for stack traces to return before terminating "
 753                                   "the job (<= 0 wait forever)",
 754                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 755                                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 756                                   &orte_stack_trace_wait_timeout);
 757 
 758     /* register the URI of the UNIVERSAL data server */
 759     orte_data_server_uri = NULL;
 760     (void) mca_base_var_register ("orte", "pmix", NULL, "server_uri",
 761                                   "URI of a session-level keyval server for publish/lookup operations",
 762                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 763                                   OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL,
 764                                   &orte_data_server_uri);
 765 
 766     return ORTE_SUCCESS;
 767 }

/* [<][>][^][v][top][bottom][index][help] */