root/orte/mca/ess/singleton/ess_singleton_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rte_init
  2. rte_finalize
  3. set_handler_default
  4. fork_hnp
  5. rte_abort

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2011 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
  14  * Copyright (c) 2011      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2013-2018 Intel, Inc.  All rights reserved.
  16  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
  17  *                         reserved.
  18  * Copyright (c) 2016-2017 Research Organization for Information Science
  19  *                         and Technology (RIST). All rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  *
  26  */
  27 
  28 #include "orte_config.h"
  29 #include "orte/constants.h"
  30 
  31 #include <string.h>
  32 #ifdef HAVE_SYS_TYPES_H
  33 #include <sys/types.h>
  34 #endif
  35 #ifdef HAVE_UNISTD_H
  36 #include <unistd.h>
  37 #endif
  38 #include <signal.h>
  39 #include <errno.h>
  40 
  41 #include "opal/hash_string.h"
  42 #include "opal/util/arch.h"
  43 #include "opal/util/argv.h"
  44 #include "opal/util/opal_environ.h"
  45 #include "opal/util/path.h"
  46 #include "opal/util/timings.h"
  47 #include "opal/runtime/opal_progress_threads.h"
  48 #include "opal/mca/installdirs/installdirs.h"
  49 #include "opal/mca/pmix/base/base.h"
  50 #include "opal/mca/pmix/pmix.h"
  51 
  52 #include "orte/util/show_help.h"
  53 #include "orte/util/proc_info.h"
  54 #include "orte/mca/errmgr/base/base.h"
  55 #include "orte/mca/filem/base/base.h"
  56 #include "orte/mca/plm/base/base.h"
  57 #include "orte/mca/rml/base/rml_contact.h"
  58 #include "orte/mca/state/base/base.h"
  59 #include "orte/util/name_fns.h"
  60 #include "orte/runtime/orte_globals.h"
  61 #include "orte/util/session_dir.h"
  62 #include "orte/util/pre_condition_transports.h"
  63 
  64 #include "orte/mca/ess/ess.h"
  65 #include "orte/mca/ess/base/base.h"
  66 #include "orte/mca/ess/singleton/ess_singleton.h"
  67 
  68 
  69 static int rte_init(void);
  70 static int rte_finalize(void);
  71 static void rte_abort(int status, bool report);
  72 
  73 orte_ess_base_module_t orte_ess_singleton_module = {
  74     rte_init,
  75     rte_finalize,
  76     rte_abort,
  77     NULL /* ft_event */
  78 };
  79 
  80 extern char *orte_ess_singleton_server_uri;
  81 static bool added_transport_keys=false;
  82 static bool added_num_procs = false;
  83 static bool added_app_ctx = false;
  84 static bool added_pmix_envs = false;
  85 static bool progress_thread_running = false;
  86 
  87 static int fork_hnp(void);
  88 
  89 static int rte_init(void)
  90 {
  91     int rc, ret;
  92     char *error = NULL;
  93     int u32, *u32ptr;
  94     uint16_t u16, *u16ptr;
  95     orte_process_name_t name;
  96 
  97     /* run the prolog */
  98     if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
  99         ORTE_ERROR_LOG(rc);
 100         return rc;
 101     }
 102     u32ptr = &u32;
 103     u16ptr = &u16;
 104 
 105     if (NULL != mca_ess_singleton_component.server_uri) {
 106         /* we are going to connect to a server HNP */
 107         if (0 == strncmp(mca_ess_singleton_component.server_uri, "file", strlen("file")) ||
 108             0 == strncmp(mca_ess_singleton_component.server_uri, "FILE", strlen("FILE"))) {
 109             char input[1024], *filename;
 110             FILE *fp;
 111 
 112             /* it is a file - get the filename */
 113             filename = strchr(mca_ess_singleton_component.server_uri, ':');
 114             if (NULL == filename) {
 115                 /* filename is not correctly formatted */
 116                 orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
 117                                "singleton", mca_ess_singleton_component.server_uri);
 118                 return ORTE_ERROR;
 119             }
 120             ++filename; /* space past the : */
 121 
 122             if (0 >= strlen(filename)) {
 123                 /* they forgot to give us the name! */
 124                 orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
 125                                "singleton", mca_ess_singleton_component.server_uri);
 126                 return ORTE_ERROR;
 127             }
 128 
 129             /* open the file and extract the uri */
 130             fp = fopen(filename, "r");
 131             if (NULL == fp) { /* can't find or read file! */
 132                 orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
 133                                "singleton", mca_ess_singleton_component.server_uri);
 134                 return ORTE_ERROR;
 135             }
 136             memset(input, 0, 1024);  // initialize the array to ensure a NULL termination
 137             if (NULL == fgets(input, 1023, fp)) {
 138                 /* something malformed about file */
 139                 fclose(fp);
 140                 orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
 141                                "singleton", mca_ess_singleton_component.server_uri, "singleton");
 142                 return ORTE_ERROR;
 143             }
 144             fclose(fp);
 145             input[strlen(input)-1] = '\0';  /* remove newline */
 146             orte_process_info.my_hnp_uri = strdup(input);
 147         } else {
 148             orte_process_info.my_hnp_uri = strdup(mca_ess_singleton_component.server_uri);
 149         }
 150         /* save the daemon uri - we will process it later */
 151         orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
 152         /* construct our name - we are in their job family, so we know that
 153          * much. However, we cannot know how many other singletons and jobs
 154          * this HNP is running. Oh well - if someone really wants to use this
 155          * option, they can try to figure it out. For now, we'll just assume
 156          * we are the only ones */
 157         ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_HNP->jobid, 1);
 158         /* obviously, we are vpid=0 for this job */
 159         ORTE_PROC_MY_NAME->vpid = 0;
 160 
 161         /* for convenience, push the pubsub version of this param into the environ */
 162         opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ);
 163     } else if (NULL != getenv("SINGULARITY_CONTAINER") ||
 164                mca_ess_singleton_component.isolated) {
 165         /* ensure we use the isolated pmix component */
 166         opal_setenv(OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
 167     } else {
 168         /* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */
 169         opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ);
 170         /* spawn our very own HNP to support us */
 171         if (ORTE_SUCCESS != (rc = fork_hnp())) {
 172             ORTE_ERROR_LOG(rc);
 173             return rc;
 174         }
 175         /* our name was given to us by the HNP */
 176         opal_setenv(OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
 177     }
 178 
 179     /* get an async event base - we use the opal_async one so
 180      * we don't startup extra threads if not needed */
 181     orte_event_base = opal_progress_thread_init(NULL);
 182     progress_thread_running = true;
 183 
 184     /* open and setup pmix */
 185     if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
 186         error = "opening pmix";
 187         goto error;
 188     }
 189     if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
 190         error = "select pmix";
 191         goto error;
 192     }
 193     /* set the event base */
 194     opal_pmix_base_set_evbase(orte_event_base);
 195     /* initialize the selected module */
 196     if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) {
 197         /* we cannot run */
 198         error = "pmix init";
 199         goto error;
 200     }
 201 
 202     /* pmix.init set our process name down in the OPAL layer,
 203      * so carry it forward here */
 204     ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
 205     ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
 206     name.jobid = OPAL_PROC_MY_NAME.jobid;
 207     name.vpid = ORTE_VPID_WILDCARD;
 208 
 209     /* get our local rank from PMI */
 210     OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK,
 211                           ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
 212     if (OPAL_SUCCESS != ret) {
 213         error = "getting local rank";
 214         goto error;
 215     }
 216     orte_process_info.my_local_rank = u16;
 217 
 218     /* get our node rank from PMI */
 219     OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK,
 220                           ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16);
 221     if (OPAL_SUCCESS != ret) {
 222         error = "getting node rank";
 223         goto error;
 224     }
 225     orte_process_info.my_node_rank = u16;
 226 
 227     /* get max procs */
 228     OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS,
 229                           &name, &u32ptr, OPAL_UINT32);
 230     if (OPAL_SUCCESS != ret) {
 231         error = "getting max procs";
 232         goto error;
 233     }
 234     orte_process_info.max_procs = u32;
 235 
 236     /* we are a singleton, so there is only one proc in the job */
 237     orte_process_info.num_procs = 1;
 238     /* push into the environ for pickup in MPI layer for
 239      * MPI-3 required info key
 240      */
 241     if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) {
 242         char * num_procs;
 243         opal_asprintf(&num_procs, "%d", orte_process_info.num_procs);
 244         opal_setenv(OPAL_MCA_PREFIX"orte_ess_num_procs", num_procs, true, &environ);
 245         free(num_procs);
 246         added_num_procs = true;
 247     }
 248     if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) {
 249         char * num_procs;
 250         opal_asprintf(&num_procs, "%d", orte_process_info.num_procs);
 251         opal_setenv("OMPI_APP_CTX_NUM_PROCS", num_procs, true, &environ);
 252         free(num_procs);
 253         added_app_ctx = true;
 254     }
 255 
 256 
 257     /* get our app number from PMI - ok if not found */
 258     OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM,
 259                           ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
 260     if (OPAL_SUCCESS == ret) {
 261         orte_process_info.app_num = u32;
 262     } else {
 263         orte_process_info.app_num = 0;
 264     }
 265     /* set some other standard values */
 266     orte_process_info.num_local_peers = 0;
 267 
 268     /* setup transport keys in case the MPI layer needs them -
 269      * we can use the jobfam and stepid as unique keys
 270      * because they are unique values assigned by the RM
 271      */
 272     if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
 273         char *key;
 274         ret = orte_pre_condition_transports(NULL, &key);
 275         if (ORTE_SUCCESS == ret) {
 276             opal_setenv(OPAL_MCA_PREFIX"orte_precondition_transports", key, true, &environ);
 277             free(key);
 278         }
 279     }
 280 
 281     /* now that we have all required info, complete the setup */
 282     /*
 283      * stdout/stderr buffering
 284      * If the user requested to override the default setting then do
 285      * as they wish.
 286      */
 287     if( orte_ess_base_std_buffering > -1 ) {
 288         if( 0 == orte_ess_base_std_buffering ) {
 289             setvbuf(stdout, NULL, _IONBF, 0);
 290             setvbuf(stderr, NULL, _IONBF, 0);
 291         }
 292         else if( 1 == orte_ess_base_std_buffering ) {
 293             setvbuf(stdout, NULL, _IOLBF, 0);
 294             setvbuf(stderr, NULL, _IOLBF, 0);
 295         }
 296         else if( 2 == orte_ess_base_std_buffering ) {
 297             setvbuf(stdout, NULL, _IOFBF, 0);
 298             setvbuf(stderr, NULL, _IOFBF, 0);
 299         }
 300     }
 301 
 302     /* if I am an MPI app, we will let the MPI layer define and
 303      * control the opal_proc_t structure. Otherwise, we need to
 304      * do so here */
 305     if (ORTE_PROC_NON_MPI) {
 306         orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
 307         orte_process_info.super.proc_hostname = orte_process_info.nodename;
 308         orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
 309         orte_process_info.super.proc_arch = opal_local_arch;
 310         opal_proc_local_set(&orte_process_info.super);
 311     }
 312 
 313     /* open and setup the state machine */
 314     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
 315         ORTE_ERROR_LOG(ret);
 316         error = "orte_state_base_open";
 317         goto error;
 318     }
 319     if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
 320         ORTE_ERROR_LOG(ret);
 321         error = "orte_state_base_select";
 322         goto error;
 323     }
 324 
 325     /* open the errmgr */
 326     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
 327         ORTE_ERROR_LOG(ret);
 328         error = "orte_errmgr_base_open";
 329         goto error;
 330     }
 331 
 332     /* setup my session directory */
 333     if (orte_create_session_dirs) {
 334         OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
 335                              "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
 336                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 337                              (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
 338                              orte_process_info.nodename));
 339         if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
 340             ORTE_ERROR_LOG(ret);
 341             error = "orte_session_dir";
 342             goto error;
 343         }
 344         /* Once the session directory location has been established, set
 345            the opal_output env file location to be in the
 346            proc-specific session directory. */
 347         opal_output_set_output_file_info(orte_process_info.proc_session_dir,
 348                                          "output-", NULL, NULL);
 349         /* register the directory for cleanup */
 350         if (NULL != opal_pmix.register_cleanup) {
 351             if (orte_standalone_operation) {
 352                 if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, true, false, true))) {
 353                     ORTE_ERROR_LOG(ret);
 354                     error = "register cleanup";
 355                     goto error;
 356                 }
 357             } else {
 358                 if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.job_session_dir, true, false, false))) {
 359                     ORTE_ERROR_LOG(ret);
 360                     error = "register cleanup";
 361                     goto error;
 362                 }
 363             }
 364         }
 365     }
 366 
 367     /* if we have info on the HNP and local daemon, process it */
 368     if (NULL != orte_process_info.my_hnp_uri) {
 369         /* we have to set the HNP's name, even though we won't route messages directly
 370          * to it. This is required to ensure that we -do- send messages to the correct
 371          * HNP name
 372          */
 373         if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
 374                                                             ORTE_PROC_MY_HNP, NULL))) {
 375             ORTE_ERROR_LOG(ret);
 376             error = "orte_rml_parse_HNP";
 377             goto error;
 378         }
 379     }
 380     if (NULL != orte_process_info.my_daemon_uri) {
 381         opal_value_t val;
 382 
 383         /* extract the daemon's name so we can update the routing table */
 384         if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
 385                                                             ORTE_PROC_MY_DAEMON, NULL))) {
 386             ORTE_ERROR_LOG(ret);
 387             error = "orte_rml_parse_daemon";
 388             goto error;
 389         }
 390         /* Set the contact info in the database - this won't actually establish
 391          * the connection, but just tells us how to reach the daemon
 392          * if/when we attempt to send to it
 393          */
 394         OBJ_CONSTRUCT(&val, opal_value_t);
 395         val.key = OPAL_PMIX_PROC_URI;
 396         val.type = OPAL_STRING;
 397         val.data.string = orte_process_info.my_daemon_uri;
 398         if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_DAEMON, &val))) {
 399             ORTE_ERROR_LOG(ret);
 400             val.key = NULL;
 401             val.data.string = NULL;
 402             OBJ_DESTRUCT(&val);
 403             error = "store DAEMON URI";
 404             goto error;
 405         }
 406         val.key = NULL;
 407         val.data.string = NULL;
 408         OBJ_DESTRUCT(&val);
 409     }
 410 
 411     /* setup the errmgr */
 412     if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
 413         ORTE_ERROR_LOG(ret);
 414         error = "orte_errmgr_base_select";
 415         goto error;
 416     }
 417 
 418     /* setup process binding */
 419     if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
 420         error = "proc_binding";
 421         goto error;
 422     }
 423 
 424     /* this needs to be set to enable debugger use when direct launched */
 425     if (NULL == orte_process_info.my_daemon_uri) {
 426         orte_standalone_operation = true;
 427     }
 428 
 429     /* set max procs */
 430     if (orte_process_info.max_procs < orte_process_info.num_procs) {
 431         orte_process_info.max_procs = orte_process_info.num_procs;
 432     }
 433 
 434     /* push our hostname so others can find us, if they need to - the
 435      * native PMIx component will ignore this request as the hostname
 436      * is provided by the system */
 437     OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
 438     if (ORTE_SUCCESS != ret) {
 439         error = "db store hostname";
 440         goto error;
 441     }
 442 
 443     /* if we are an ORTE app - and not an MPI app - then
 444      * we need to exchange our connection info here.
 445      * MPI_Init has its own modex, so we don't need to do
 446      * two of them. However, if we don't do a modex at all,
 447      * then processes have no way to communicate
 448      *
 449      * NOTE: only do this when the process originally launches.
 450      * Cannot do this on a restart as the rest of the processes
 451      * in the job won't be executing this step, so we would hang
 452      */
 453     if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
 454         /* need to commit the data before we fence */
 455         opal_pmix.commit();
 456         if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 457             error = "opal_pmix.fence() failed";
 458             goto error;
 459         }
 460     }
 461 
 462     return ORTE_SUCCESS;
 463 
 464   error:
 465     if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
 466         orte_show_help("help-orte-runtime.txt",
 467                        "orte_init:startup:internal-failure",
 468                        true, error, ORTE_ERROR_NAME(ret), ret);
 469     }
 470     return ret;
 471 }
 472 
 473 static int rte_finalize(void)
 474 {
 475     /* remove the envars that we pushed into environ
 476      * so we leave that structure intact
 477      */
 478     if (added_transport_keys) {
 479         unsetenv(OPAL_MCA_PREFIX"orte_precondition_transports");
 480     }
 481     if (added_num_procs) {
 482         unsetenv(OPAL_MCA_PREFIX"orte_ess_num_procs");
 483     }
 484     if (added_app_ctx) {
 485         unsetenv("OMPI_APP_CTX_NUM_PROCS");
 486     }
 487     if (added_pmix_envs) {
 488         unsetenv("PMIX_NAMESPACE");
 489         unsetenv("PMIX_RANK");
 490         unsetenv("PMIX_SERVER_URI");
 491         unsetenv("PMIX_SECURITY_MODE");
 492     }
 493     /* close frameworks */
 494     (void) mca_base_framework_close(&orte_filem_base_framework);
 495     (void) mca_base_framework_close(&orte_errmgr_base_framework);
 496 
 497     /* mark us as finalized */
 498     if (NULL != opal_pmix.finalize) {
 499         opal_pmix.finalize();
 500         (void) mca_base_framework_close(&opal_pmix_base_framework);
 501     }
 502 
 503     (void) mca_base_framework_close(&orte_state_base_framework);
 504     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 505 
 506     /* cleanup the process info */
 507     orte_proc_info_finalize();
 508 
 509     /* release the event base */
 510     if (progress_thread_running) {
 511         opal_progress_thread_finalize(NULL);
 512         progress_thread_running = false;
 513     }
 514     return ORTE_SUCCESS;
 515 }
 516 
 517 #define ORTE_URI_MSG_LGTH   256
 518 
 519 static void set_handler_default(int sig)
 520 {
 521     struct sigaction act;
 522 
 523     act.sa_handler = SIG_DFL;
 524     act.sa_flags = 0;
 525     sigemptyset(&act.sa_mask);
 526 
 527     sigaction(sig, &act, (struct sigaction *)0);
 528 }
 529 
 530 static int fork_hnp(void)
 531 {
 532     int p[2], death_pipe[2];
 533     char *cmd;
 534     char **argv = NULL;
 535     int argc;
 536     char *param, *cptr;
 537     sigset_t sigs;
 538     int buffer_length, num_chars_read, chunk;
 539     char *orted_uri;
 540     int rc, i;
 541 
 542     /* A pipe is used to communicate between the parent and child to
 543        indicate whether the exec ultimately succeeded or failed.  The
 544        child sets the pipe to be close-on-exec; the child only ever
 545        writes anything to the pipe if there is an error (e.g.,
 546        executable not found, exec() fails, etc.).  The parent does a
 547        blocking read on the pipe; if the pipe closed with no data,
 548        then the exec() succeeded.  If the parent reads something from
 549        the pipe, then the child was letting us know that it failed.
 550     */
 551     if (pipe(p) < 0) {
 552         ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
 553         return ORTE_ERR_SYS_LIMITS_PIPES;
 554     }
 555 
 556     /* we also have to give the HNP a pipe it can watch to know when
 557      * we terminated. Since the HNP is going to be a child of us, it
 558      * can't just use waitpid to see when we leave - so it will watch
 559      * the pipe instead
 560      */
 561     if (pipe(death_pipe) < 0) {
 562         ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
 563         return ORTE_ERR_SYS_LIMITS_PIPES;
 564     }
 565 
 566     /* find the orted binary using the install_dirs support - this also
 567      * checks to ensure that we can see this executable and it *is* executable by us
 568      */
 569     cmd = opal_path_access("orted", opal_install_dirs.bindir, X_OK);
 570     if (NULL == cmd) {
 571         /* guess we couldn't do it - best to abort */
 572         ORTE_ERROR_LOG(ORTE_ERR_FILE_NOT_EXECUTABLE);
 573         close(p[0]);
 574         close(p[1]);
 575         return ORTE_ERR_FILE_NOT_EXECUTABLE;
 576     }
 577 
 578     /* okay, setup an appropriate argv */
 579     opal_argv_append(&argc, &argv, "orted");
 580 
 581     /* tell the daemon it is to be the HNP */
 582     opal_argv_append(&argc, &argv, "--hnp");
 583 
 584     /* tell the daemon to get out of our process group */
 585     opal_argv_append(&argc, &argv, "--set-sid");
 586 
 587     /* tell the daemon to report back its uri so we can connect to it */
 588     opal_argv_append(&argc, &argv, "--report-uri");
 589     opal_asprintf(&param, "%d", p[1]);
 590     opal_argv_append(&argc, &argv, param);
 591     free(param);
 592 
 593     /* give the daemon a pipe it can watch to tell when we have died */
 594     opal_argv_append(&argc, &argv, "--singleton-died-pipe");
 595     opal_asprintf(&param, "%d", death_pipe[0]);
 596     opal_argv_append(&argc, &argv, param);
 597     free(param);
 598 
 599     /* add any debug flags */
 600     if (orte_debug_flag) {
 601         opal_argv_append(&argc, &argv, "--debug");
 602     }
 603 
 604     if (orte_debug_daemons_flag) {
 605         opal_argv_append(&argc, &argv, "--debug-daemons");
 606     }
 607 
 608     if (orte_debug_daemons_file_flag) {
 609         if (!orte_debug_daemons_flag) {
 610             opal_argv_append(&argc, &argv, "--debug-daemons");
 611         }
 612         opal_argv_append(&argc, &argv, "--debug-daemons-file");
 613     }
 614 
 615     /* indicate that it must use the novm state machine */
 616     opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
 617     opal_argv_append(&argc, &argv, "state_novm_select");
 618     opal_argv_append(&argc, &argv, "1");
 619 
 620     /* direct the selection of the ess component */
 621     opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
 622     opal_argv_append(&argc, &argv, "ess");
 623     opal_argv_append(&argc, &argv, "hnp");
 624 
 625     /* direct the selection of the pmix component */
 626     opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
 627     opal_argv_append(&argc, &argv, "pmix");
 628     opal_argv_append(&argc, &argv, "^s1,s2,cray,isolated");
 629 
 630     /* Fork off the child */
 631     orte_process_info.hnp_pid = fork();
 632     if(orte_process_info.hnp_pid < 0) {
 633         ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
 634         close(p[0]);
 635         close(p[1]);
 636         close(death_pipe[0]);
 637         close(death_pipe[1]);
 638         free(cmd);
 639         opal_argv_free(argv);
 640         return ORTE_ERR_SYS_LIMITS_CHILDREN;
 641     }
 642 
 643     if (orte_process_info.hnp_pid == 0) {
 644         close(p[0]);
 645         close(death_pipe[1]);
 646         /* I am the child - exec me */
 647 
 648         /* Set signal handlers back to the default.  Do this close
 649            to the execve() because the event library may (and likely
 650            will) reset them.  If we don't do this, the event
 651            library may have left some set that, at least on some
 652            OS's, don't get reset via fork() or exec().  Hence, the
 653            orted could be unkillable (for example). */
 654         set_handler_default(SIGTERM);
 655         set_handler_default(SIGINT);
 656         set_handler_default(SIGHUP);
 657         set_handler_default(SIGPIPE);
 658         set_handler_default(SIGCHLD);
 659 
 660         /* Unblock all signals, for many of the same reasons that
 661            we set the default handlers, above.  This is noticable
 662            on Linux where the event library blocks SIGTERM, but we
 663            don't want that blocked by the orted (or, more
 664            specifically, we don't want it to be blocked by the
 665            orted and then inherited by the ORTE processes that it
 666            forks, making them unkillable by SIGTERM). */
 667         sigprocmask(0, 0, &sigs);
 668         sigprocmask(SIG_UNBLOCK, &sigs, 0);
 669 
 670         execv(cmd, argv);
 671 
 672         /* if I get here, the execv failed! */
 673         orte_show_help("help-ess-base.txt", "ess-base:execv-error",
 674                        true, cmd, strerror(errno));
 675         exit(1);
 676 
 677     } else {
 678         int count;
 679 
 680         free(cmd);
 681         /* I am the parent - wait to hear something back and
 682          * report results
 683          */
 684         close(p[1]);  /* parent closes the write - orted will write its contact info to it*/
 685         close(death_pipe[0]);  /* parent closes the death_pipe's read */
 686         opal_argv_free(argv);
 687 
 688         /* setup the buffer to read the HNP's uri */
 689         buffer_length = ORTE_URI_MSG_LGTH;
 690         chunk = ORTE_URI_MSG_LGTH-1;
 691         num_chars_read = 0;
 692         orted_uri = (char*)malloc(buffer_length);
 693         memset(orted_uri, 0, buffer_length);
 694 
 695         while (0 != (rc = read(p[0], &orted_uri[num_chars_read], chunk))) {
 696             if (rc < 0 && (EAGAIN == errno || EINTR == errno)) {
 697                 continue;
 698             } else if (rc < 0) {
 699                 num_chars_read = -1;
 700                 break;
 701             }
 702             /* we read something - better get more */
 703             num_chars_read += rc;
 704             chunk -= rc;
 705             if (0 == chunk) {
 706                 chunk = ORTE_URI_MSG_LGTH;
 707                 orted_uri = realloc((void*)orted_uri, buffer_length+chunk);
 708                 memset(&orted_uri[buffer_length], 0, chunk);
 709                 buffer_length += chunk;
 710             }
 711         }
 712         close(p[0]);
 713 
 714         if (num_chars_read <= 0) {
 715             /* we didn't get anything back - this is bad */
 716             ORTE_ERROR_LOG(ORTE_ERR_HNP_COULD_NOT_START);
 717             free(orted_uri);
 718             return ORTE_ERR_HNP_COULD_NOT_START;
 719         }
 720 
 721         /* parse the sysinfo from the returned info - must
 722          * start from the end of the string as the uri itself
 723          * can contain brackets */
 724         if (NULL == (param = strrchr(orted_uri, '['))) {
 725             ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
 726             free(orted_uri);
 727             return ORTE_ERR_COMM_FAILURE;
 728         }
 729         *param = '\0'; /* terminate the uri string */
 730         ++param;  /* point to the start of the sysinfo */
 731 
 732         /* find the end of the sysinfo */
 733         if (NULL == (cptr = strchr(param, ']'))) {
 734             ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
 735             free(orted_uri);
 736             return ORTE_ERR_COMM_FAILURE;
 737         }
 738         *cptr = '\0';  /* terminate the sysinfo string */
 739         ++cptr;  /* point to the start of the pmix uri */
 740 
 741         /* convert the sysinfo string */
 742         if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_sysinfo(&orte_local_cpu_type,
 743                                       &orte_local_cpu_model, param))) {
 744             ORTE_ERROR_LOG(rc);
 745             free(orted_uri);
 746             return rc;
 747         }
 748 
 749         /* save the daemon uri - we will process it later */
 750         orte_process_info.my_daemon_uri = strdup(orted_uri);
 751         /* likewise, since this is also the HNP, set that uri too */
 752         orte_process_info.my_hnp_uri = orted_uri;
 753 
 754         /* split the pmix_uri into its parts */
 755         argv = opal_argv_split(cptr, '*');
 756         count = opal_argv_count(argv);
 757         /* push each piece into the environment */
 758         for (i=0; i < count; i++) {
 759             char *c = strchr(argv[i], '=');
 760             assert(NULL != c);
 761             *c++ = '\0';
 762             opal_setenv(argv[i], c, true, &environ);
 763         }
 764         opal_argv_free(argv);
 765         added_pmix_envs = true;
 766 
 767         /* all done - report success */
 768         return ORTE_SUCCESS;
 769     }
 770 }
 771 
 772 static void rte_abort(int status, bool report)
 773 {
 774     struct timespec tp = {0, 100000};
 775 
 776     OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
 777                          "%s ess:singleton:abort: abort with status %d",
 778                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 779                          status));
 780 
 781     /* PMI doesn't like NULL messages, but our interface
 782      * doesn't provide one - so rig one up here
 783      */
 784     opal_pmix.abort(status, "N/A", NULL);
 785 
 786     /* provide a little delay for the PMIx thread to
 787      * get the info out */
 788     nanosleep(&tp, NULL);
 789 
 790     /* Now Exit */
 791     _exit(status);
 792 }

/* [<][>][^][v][top][bottom][index][help] */