root/orte/mca/ess/base/ess_base_std_orted.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. setup_sighandler
  2. orte_ess_base_orted_setup
  3. orte_ess_base_orted_finalize
  4. shutdown_signal
  5. epipe_signal_callback
  6. signal_forward_callback

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2009      Institut National de Recherche en Informatique
  13  *                         et Automatique. All rights reserved.
  14  * Copyright (c) 2011      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.  All rights
  16  *                         reserved.
  17  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  18  * Copyright (c) 2017      IBM Corporation. All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "orte_config.h"
  27 #include "orte/constants.h"
  28 
  29 #include <sys/types.h>
  30 #include <stdio.h>
  31 #ifdef HAVE_FCNTL_H
  32 #include <fcntl.h>
  33 #endif
  34 #ifdef HAVE_UNISTD_H
  35 #include <unistd.h>
  36 #endif
  37 
  38 #include "opal/dss/dss.h"
  39 #include "opal/mca/event/event.h"
  40 #include "opal/runtime/opal.h"
  41 #include "opal/mca/hwloc/base/base.h"
  42 #include "opal/mca/pmix/base/base.h"
  43 #include "opal/mca/pstat/base/base.h"
  44 #include "opal/util/arch.h"
  45 #include "opal/util/opal_environ.h"
  46 #include "opal/util/os_path.h"
  47 #include "opal/util/proc.h"
  48 
  49 #include "orte/mca/rtc/base/base.h"
  50 #include "orte/mca/rml/base/base.h"
  51 #include "orte/mca/rml/base/rml_contact.h"
  52 #include "orte/mca/routed/base/base.h"
  53 #include "orte/mca/routed/routed.h"
  54 #include "orte/mca/oob/base/base.h"
  55 #include "orte/mca/grpcomm/grpcomm.h"
  56 #include "orte/mca/grpcomm/base/base.h"
  57 #include "orte/mca/iof/base/base.h"
  58 #include "orte/mca/plm/base/base.h"
  59 #include "orte/mca/odls/base/base.h"
  60 #include "orte/mca/errmgr/errmgr.h"
  61 #include "orte/mca/rmaps/base/base.h"
  62 #include "orte/mca/filem/base/base.h"
  63 #include "orte/util/proc_info.h"
  64 #include "orte/util/session_dir.h"
  65 #include "orte/util/name_fns.h"
  66 #include "orte/util/show_help.h"
  67 #include "orte/mca/errmgr/base/base.h"
  68 #include "orte/mca/state/base/base.h"
  69 #include "orte/mca/state/state.h"
  70 #include "orte/runtime/orte_wait.h"
  71 #include "orte/runtime/orte_globals.h"
  72 #include "orte/runtime/orte_quit.h"
  73 #include "orte/orted/pmix/pmix_server.h"
  74 
  75 #include "orte/mca/ess/base/base.h"
  76 
  77 /* local globals */
  78 static bool plm_in_use=false;
  79 static bool signals_set=false;
  80 static opal_event_t term_handler;
  81 static opal_event_t int_handler;
  82 static opal_event_t epipe_handler;
  83 static char *log_path = NULL;
  84 static void shutdown_signal(int fd, short flags, void *arg);
  85 static void epipe_signal_callback(int fd, short flags, void *arg);
  86 static void signal_forward_callback(int fd, short event, void *arg);
  87 static opal_event_t *forward_signals_events = NULL;
  88 
  89 static void setup_sighandler(int signal, opal_event_t *ev,
  90                              opal_event_cbfunc_t cbfunc)
  91 {
  92     opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev);
  93     opal_event_set_priority(ev, ORTE_ERROR_PRI);
  94     opal_event_signal_add(ev, NULL);
  95 }
  96 
  97 
  98 int orte_ess_base_orted_setup(void)
  99 {
 100     int ret = ORTE_ERROR;
 101     int fd;
 102     char log_file[PATH_MAX];
 103     char *jobidstring;
 104     char *error = NULL;
 105     orte_job_t *jdata;
 106     orte_proc_t *proc;
 107     orte_app_context_t *app;
 108     char *param;
 109     hwloc_obj_t obj;
 110     unsigned i, j;
 111     orte_topology_t *t;
 112     orte_ess_base_signal_t *sig;
 113     int idx;
 114 
 115     /* my name is set, xfer it to the OPAL layer */
 116     orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
 117     orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
 118     orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
 119     orte_process_info.super.proc_arch = opal_local_arch;
 120     opal_proc_local_set(&orte_process_info.super);
 121 
 122     plm_in_use = false;
 123 
 124     /* setup callback for SIGPIPE */
 125     setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback);
 126     /* Set signal handlers to catch kill signals so we can properly clean up
 127      * after ourselves.
 128      */
 129     setup_sighandler(SIGTERM, &term_handler, shutdown_signal);
 130     setup_sighandler(SIGINT, &int_handler, shutdown_signal);
 131     /** setup callbacks for signals we should forward */
 132     if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) {
 133         forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx);
 134         if (NULL == forward_signals_events) {
 135             ret = ORTE_ERR_OUT_OF_RESOURCE;
 136             error = "unable to malloc";
 137             goto error;
 138         }
 139         idx = 0;
 140         OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
 141             setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback);
 142             ++idx;
 143         }
 144     }
 145     signals_set = true;
 146 
 147 
 148     /* get the local topology */
 149     if (NULL == opal_hwloc_topology) {
 150         if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
 151             error = "topology discovery";
 152             goto error;
 153         }
 154     }
 155     /* generate the signature */
 156     orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology);
 157     /* remove the hostname from the topology. Unfortunately, hwloc
 158      * decided to add the source hostname to the "topology", thus
 159      * rendering it unusable as a pure topological description. So
 160      * we remove that information here.
 161      */
 162     obj = hwloc_get_root_obj(opal_hwloc_topology);
 163     for (i=0; i < obj->infos_count; i++) {
 164         if (NULL == obj->infos[i].name ||
 165             NULL == obj->infos[i].value) {
 166             continue;
 167         }
 168         if (0 == strncmp(obj->infos[i].name, "HostName", strlen("HostName"))) {
 169             free(obj->infos[i].name);
 170             free(obj->infos[i].value);
 171             /* left justify the array */
 172             for (j=i; j < obj->infos_count-1; j++) {
 173                 obj->infos[j] = obj->infos[j+1];
 174             }
 175             obj->infos[obj->infos_count-1].name = NULL;
 176             obj->infos[obj->infos_count-1].value = NULL;
 177             obj->infos_count--;
 178             break;
 179         }
 180     }
 181     if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
 182         opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 183         opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
 184     }
 185 
 186     /* open and setup the opal_pstat framework so we can provide
 187      * process stats if requested
 188      */
 189     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) {
 190         ORTE_ERROR_LOG(ret);
 191         error = "opal_pstat_base_open";
 192         goto error;
 193     }
 194     if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) {
 195         ORTE_ERROR_LOG(ret);
 196         error = "opal_pstat_base_select";
 197         goto error;
 198     }
 199 
 200     /* define the HNP name */
 201     ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid;
 202     ORTE_PROC_MY_HNP->vpid = 0;
 203 
 204     /* open and setup the state machine */
 205     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
 206         ORTE_ERROR_LOG(ret);
 207         error = "orte_state_base_open";
 208         goto error;
 209     }
 210     if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
 211         ORTE_ERROR_LOG(ret);
 212         error = "orte_state_base_select";
 213         goto error;
 214     }
 215     /* open the errmgr */
 216     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
 217         ORTE_ERROR_LOG(ret);
 218         error = "orte_errmgr_base_open";
 219         goto error;
 220     }
 221     /* some environments allow remote launches - e.g., ssh - so
 222      * open and select something -only- if we are given
 223      * a specific module to use
 224      */
 225     (void) mca_base_var_env_name("plm", &param);
 226     plm_in_use = !!(getenv(param));
 227     free (param);
 228     if (plm_in_use)  {
 229         if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
 230             ORTE_ERROR_LOG(ret);
 231             error = "orte_plm_base_open";
 232             goto error;
 233         }
 234         if (ORTE_SUCCESS != (ret = orte_plm_base_select())) {
 235             ORTE_ERROR_LOG(ret);
 236             error = "orte_plm_base_select";
 237             goto error;
 238         }
 239     }
 240     /* setup my session directory here as the OOB may need it */
 241     if (orte_create_session_dirs) {
 242         OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
 243                              "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
 244                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 245                              (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
 246                              orte_process_info.nodename));
 247 
 248         /* take a pass thru the session directory code to fillin the
 249          * tmpdir names - don't create anything yet
 250          */
 251         if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
 252             ORTE_ERROR_LOG(ret);
 253             error = "orte_session_dir define";
 254             goto error;
 255         }
 256         /* clear the session directory just in case there are
 257          * stale directories laying around
 258          */
 259         orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 260         /* now actually create the directory tree */
 261         if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
 262             ORTE_ERROR_LOG(ret);
 263             error = "orte_session_dir";
 264             goto error;
 265         }
 266         /* set the opal_output env file location to be in the
 267          * proc-specific session directory. */
 268         opal_output_set_output_file_info(orte_process_info.proc_session_dir,
 269                                          "output-", NULL, NULL);
 270         /* setup stdout/stderr */
 271         if (orte_debug_daemons_file_flag) {
 272             /* if we are debugging to a file, then send stdout/stderr to
 273              * the orted log file
 274              */
 275             /* get my jobid */
 276             if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobidstring,
 277                                                                          ORTE_PROC_MY_NAME->jobid))) {
 278                 ORTE_ERROR_LOG(ret);
 279                 error = "convert_jobid";
 280                 goto error;
 281             }
 282             /* define a log file name in the session directory */
 283             snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log",
 284                      jobidstring, orte_process_info.nodename);
 285             log_path = opal_os_path(false, orte_process_info.top_session_dir,
 286                                     log_file, NULL);
 287 
 288             fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
 289             if (fd < 0) {
 290                 /* couldn't open the file for some reason, so
 291                  * just connect everything to /dev/null
 292                  */
 293                 fd = open("/dev/null", O_RDWR|O_CREAT|O_TRUNC, 0666);
 294             } else {
 295                 dup2(fd, STDOUT_FILENO);
 296                 dup2(fd, STDERR_FILENO);
 297                 if(fd != STDOUT_FILENO && fd != STDERR_FILENO) {
 298                     close(fd);
 299                 }
 300             }
 301         }
 302     }
 303     /* setup the global job and node arrays */
 304     orte_job_data = OBJ_NEW(opal_hash_table_t);
 305     if (ORTE_SUCCESS != (ret = opal_hash_table_init(orte_job_data, 128))) {
 306         ORTE_ERROR_LOG(ret);
 307         error = "setup job array";
 308         goto error;
 309     }
 310     orte_node_pool = OBJ_NEW(opal_pointer_array_t);
 311     if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
 312                                ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 313                                ORTE_GLOBAL_ARRAY_MAX_SIZE,
 314                                ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
 315         ORTE_ERROR_LOG(ret);
 316         error = "setup node array";
 317         goto error;
 318     }
 319     orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
 320     if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
 321                                ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 322                                ORTE_GLOBAL_ARRAY_MAX_SIZE,
 323                                ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
 324         ORTE_ERROR_LOG(ret);
 325         error = "setup node topologies array";
 326         goto error;
 327     }
 328     /* Setup the job data object for the daemons */
 329     /* create and store the job data object */
 330     jdata = OBJ_NEW(orte_job_t);
 331     jdata->jobid = ORTE_PROC_MY_NAME->jobid;
 332     opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
 333     /* every job requires at least one app */
 334     app = OBJ_NEW(orte_app_context_t);
 335     opal_pointer_array_set_item(jdata->apps, 0, app);
 336     jdata->num_apps++;
 337 
 338     /* create and store a proc object for us */
 339     proc = OBJ_NEW(orte_proc_t);
 340     proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
 341     proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
 342     proc->pid = orte_process_info.pid;
 343     proc->state = ORTE_PROC_STATE_RUNNING;
 344     opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc);
 345     /* record that the daemon job is running */
 346     jdata->num_procs = 1;
 347     jdata->state = ORTE_JOB_STATE_RUNNING;
 348     /* obviously, we have "reported" */
 349     jdata->num_reported = 1;
 350 
 351     /* setup the PMIx framework - ensure it skips all non-PMIx components,
 352      * but do not override anything we were given */
 353     opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
 354     if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
 355         ORTE_ERROR_LOG(ret);
 356         error = "orte_pmix_base_open";
 357         goto error;
 358     }
 359     if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
 360         ORTE_ERROR_LOG(ret);
 361         error = "opal_pmix_base_select";
 362         goto error;
 363     }
 364     /* set the event base */
 365     opal_pmix_base_set_evbase(orte_event_base);
 366     /* setup the PMIx server - we need this here in case the
 367      * communications infrastructure wants to register
 368      * information */
 369     if (ORTE_SUCCESS != (ret = pmix_server_init())) {
 370         /* the server code already barked, so let's be quiet */
 371         ret = ORTE_ERR_SILENT;
 372         error = "pmix_server_init";
 373         goto error;
 374     }
 375 
 376     /* Setup the communication infrastructure */
 377     /* Routed system */
 378     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
 379         ORTE_ERROR_LOG(ret);
 380         error = "orte_routed_base_open";
 381         goto error;
 382     }
 383     if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
 384         ORTE_ERROR_LOG(ret);
 385         error = "orte_routed_base_select";
 386         goto error;
 387     }
 388     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
 389         ORTE_ERROR_LOG(ret);
 390         error = "orte_oob_base_open";
 391         goto error;
 392     }
 393     if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
 394         ORTE_ERROR_LOG(ret);
 395         error = "orte_oob_base_select";
 396         goto error;
 397     }
 398     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
 399         ORTE_ERROR_LOG(ret);
 400         error = "orte_rml_base_open";
 401         goto error;
 402     }
 403     if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
 404         ORTE_ERROR_LOG(ret);
 405         error = "orte_rml_base_select";
 406         goto error;
 407     }
 408 
 409     /* it is now safe to start the pmix server */
 410     pmix_server_start();
 411 
 412     if (NULL != orte_process_info.my_hnp_uri) {
 413         opal_value_t val;
 414 
 415         /* extract the HNP's name so we can update the routing table */
 416         if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
 417                                                             ORTE_PROC_MY_HNP, NULL))) {
 418             ORTE_ERROR_LOG(ret);
 419             error = "orte_rml_parse_HNP";
 420             goto error;
 421         }
 422         /* Set the contact info in the RML - this won't actually establish
 423          * the connection, but just tells the RML how to reach the HNP
 424          * if/when we attempt to send to it
 425          */
 426         OBJ_CONSTRUCT(&val, opal_value_t);
 427         val.key = OPAL_PMIX_PROC_URI;
 428         val.type = OPAL_STRING;
 429         val.data.string = orte_process_info.my_hnp_uri;
 430         if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_HNP, &val))) {
 431             ORTE_ERROR_LOG(ret);
 432             val.key = NULL;
 433             val.data.string = NULL;
 434             OBJ_DESTRUCT(&val);
 435             error = "store HNP URI";
 436             goto error;
 437         }
 438         val.key = NULL;
 439         val.data.string = NULL;
 440         OBJ_DESTRUCT(&val);
 441     }
 442 
 443     /* select the errmgr */
 444     if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
 445         ORTE_ERROR_LOG(ret);
 446         error = "orte_errmgr_base_select";
 447         goto error;
 448     }
 449 
 450     /*
 451      * Group communications
 452      */
 453     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
 454         ORTE_ERROR_LOG(ret);
 455         error = "orte_grpcomm_base_open";
 456         goto error;
 457     }
 458     if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
 459         ORTE_ERROR_LOG(ret);
 460         error = "orte_grpcomm_base_select";
 461         goto error;
 462     }
 463     /* Open/select the odls */
 464     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) {
 465         ORTE_ERROR_LOG(ret);
 466         error = "orte_odls_base_open";
 467         goto error;
 468     }
 469     if (ORTE_SUCCESS != (ret = orte_odls_base_select())) {
 470         ORTE_ERROR_LOG(ret);
 471         error = "orte_odls_base_select";
 472         goto error;
 473     }
 474     /* Open/select the rtc */
 475     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) {
 476         ORTE_ERROR_LOG(ret);
 477         error = "orte_rtc_base_open";
 478         goto error;
 479     }
 480     if (ORTE_SUCCESS != (ret = orte_rtc_base_select())) {
 481         ORTE_ERROR_LOG(ret);
 482         error = "orte_rtc_base_select";
 483         goto error;
 484     }
 485     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) {
 486         ORTE_ERROR_LOG(ret);
 487         error = "orte_rmaps_base_open";
 488         goto error;
 489     }
 490     if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) {
 491         ORTE_ERROR_LOG(ret);
 492         error = "orte_rmaps_base_select";
 493         goto error;
 494     }
 495 
 496     /* if a topology file was given, then the rmaps framework open
 497      * will have reset our topology. Ensure we always get the right
 498      * one by setting our node topology afterwards
 499      */
 500     t = OBJ_NEW(orte_topology_t);
 501     t->topo = opal_hwloc_topology;
 502     /* generate the signature */
 503     orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology);
 504     t->sig = strdup(orte_topo_signature);
 505     opal_pointer_array_add(orte_node_topologies, t);
 506     if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
 507         opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 508         opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
 509     }
 510 
 511     /* Now provide a chance for the PLM
 512      * to perform any module-specific init functions. This
 513      * needs to occur AFTER the communications are setup
 514      * as it may involve starting a non-blocking recv
 515      * Do this only if a specific PLM was given to us - the
 516      * orted has no need of the proxy PLM at all
 517      */
 518     if (plm_in_use) {
 519         if (ORTE_SUCCESS != (ret = orte_plm.init())) {
 520             ORTE_ERROR_LOG(ret);
 521             error = "orte_plm_init";
 522             goto error;
 523         }
 524     }
 525 
 526     /* setup I/O forwarding system - must come after we init routes */
 527     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
 528         ORTE_ERROR_LOG(ret);
 529         error = "orte_iof_base_open";
 530         goto error;
 531     }
 532     if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
 533         ORTE_ERROR_LOG(ret);
 534         error = "orte_iof_base_select";
 535         goto error;
 536     }
 537     /* setup the FileM */
 538     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) {
 539         ORTE_ERROR_LOG(ret);
 540         error = "orte_filem_base_open";
 541         goto error;
 542     }
 543     if (ORTE_SUCCESS != (ret = orte_filem_base_select())) {
 544         ORTE_ERROR_LOG(ret);
 545         error = "orte_filem_base_select";
 546         goto error;
 547     }
 548 
 549     return ORTE_SUCCESS;
 550 
 551   error:
 552     orte_show_help("help-orte-runtime.txt",
 553                    "orte_init:startup:internal-failure",
 554                    true, error, ORTE_ERROR_NAME(ret), ret);
 555     /* remove our use of the session directory tree */
 556     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 557     /* ensure we scrub the session directory tree */
 558     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 559     return ORTE_ERR_SILENT;
 560 }
 561 
 562 int orte_ess_base_orted_finalize(void)
 563 {
 564     orte_ess_base_signal_t *sig;
 565     unsigned int i;
 566 
 567     if (signals_set) {
 568         opal_event_del(&epipe_handler);
 569         opal_event_del(&term_handler);
 570         opal_event_del(&int_handler);
 571         /** Remove the USR signal handlers */
 572         i = 0;
 573         OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
 574             opal_event_signal_del(forward_signals_events + i);
 575             ++i;
 576         }
 577         free (forward_signals_events);
 578         forward_signals_events = NULL;
 579         signals_set = false;
 580     }
 581 
 582     /* cleanup */
 583     if (NULL != log_path) {
 584         unlink(log_path);
 585     }
 586     /* shutdown the pmix server */
 587     pmix_server_finalize();
 588     (void) mca_base_framework_close(&opal_pmix_base_framework);
 589 
 590     /* close frameworks */
 591     (void) mca_base_framework_close(&orte_filem_base_framework);
 592     (void) mca_base_framework_close(&orte_grpcomm_base_framework);
 593     (void) mca_base_framework_close(&orte_iof_base_framework);
 594     (void) mca_base_framework_close(&orte_errmgr_base_framework);
 595     (void) mca_base_framework_close(&orte_plm_base_framework);
 596     /* make sure our local procs are dead */
 597     orte_odls.kill_local_procs(NULL);
 598     (void) mca_base_framework_close(&orte_rmaps_base_framework);
 599     (void) mca_base_framework_close(&orte_rtc_base_framework);
 600     (void) mca_base_framework_close(&orte_odls_base_framework);
 601     (void) mca_base_framework_close(&orte_routed_base_framework);
 602     (void) mca_base_framework_close(&orte_rml_base_framework);
 603     (void) mca_base_framework_close(&orte_oob_base_framework);
 604     (void) mca_base_framework_close(&orte_state_base_framework);
 605     /* remove our use of the session directory tree */
 606     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 607     /* ensure we scrub the session directory tree */
 608     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 609     /* release the job hash table */
 610     OBJ_RELEASE(orte_job_data);
 611     return ORTE_SUCCESS;
 612 }
 613 
 614 static void shutdown_signal(int fd, short flags, void *arg)
 615 {
 616     /* trigger the call to shutdown callback to protect
 617      * against race conditions - the trigger event will
 618      * check the one-time lock
 619      */
 620     ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
 621     ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT);
 622 }
 623 
 624 /**
 625  * Deal with sigpipe errors
 626  */
 627 static void epipe_signal_callback(int fd, short flags, void *arg)
 628 {
 629     /* for now, we just ignore them */
 630     return;
 631 }
 632 
 633 /* Pass user signals to the local application processes */
 634 static void signal_forward_callback(int fd, short event, void *arg)
 635 {
 636     opal_event_t *signal = (opal_event_t*)arg;
 637     int32_t signum, rc;
 638     opal_buffer_t *cmd;
 639     orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS;
 640     orte_jobid_t job = ORTE_JOBID_WILDCARD;
 641 
 642     signum = OPAL_EVENT_SIGNAL(signal);
 643     if (!orte_execute_quiet){
 644         fprintf(stderr, "%s: Forwarding signal %d to job\n",
 645                 orte_basename, signum);
 646     }
 647 
 648     cmd = OBJ_NEW(opal_buffer_t);
 649 
 650     /* pack the command */
 651     if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD))) {
 652         ORTE_ERROR_LOG(rc);
 653         OBJ_RELEASE(cmd);
 654         return;
 655     }
 656 
 657     /* pack the jobid */
 658     if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &job, 1, ORTE_JOBID))) {
 659         ORTE_ERROR_LOG(rc);
 660         OBJ_RELEASE(cmd);
 661         return;
 662     }
 663 
 664     /* pack the signal */
 665     if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &signum, 1, OPAL_INT32))) {
 666         ORTE_ERROR_LOG(rc);
 667         OBJ_RELEASE(cmd);
 668         return;
 669     }
 670 
 671     /* send it to ourselves */
 672     if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, cmd,
 673                                           ORTE_RML_TAG_DAEMON,
 674                                           NULL, NULL))) {
 675         ORTE_ERROR_LOG(rc);
 676         OBJ_RELEASE(cmd);
 677     }
 678 
 679 }

/* [<][>][^][v][top][bottom][index][help] */