This source file includes following definitions.
- setup_sighandler
- rte_init
- rte_finalize
- rte_abort
- clean_abort
- abort_signal_callback
- epipe_signal_callback
- signal_forward_callback
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 #include "orte_config.h"
  29 #include "orte/constants.h"
  30 
  31 #include <sys/types.h>
  32 #include <stdio.h>
  33 #ifdef HAVE_FCNTL_H
  34 #include <fcntl.h>
  35 #endif
  36 #ifdef HAVE_UNISTD_H
  37 #include <unistd.h>
  38 #endif
  39 
  40 #include "opal/hash_string.h"
  41 #include "opal/class/opal_hash_table.h"
  42 #include "opal/class/opal_list.h"
  43 #include "opal/mca/event/event.h"
  44 #include "opal/runtime/opal.h"
  45 
  46 #include "opal/util/arch.h"
  47 #include "opal/util/argv.h"
  48 #include "opal/util/if.h"
  49 #include "opal/util/os_path.h"
  50 #include "opal/util/output.h"
  51 #include "opal/util/opal_environ.h"
  52 #include "opal/util/malloc.h"
  53 #include "opal/util/basename.h"
  54 #include "opal/util/fd.h"
  55 #include "opal/mca/pmix/base/base.h"
  56 #include "opal/mca/pstat/base/base.h"
  57 #include "opal/mca/hwloc/base/base.h"
  58 
  59 #include "orte/mca/oob/base/base.h"
  60 #include "orte/mca/rml/base/base.h"
  61 #include "orte/mca/rml/rml_types.h"
  62 #include "orte/mca/routed/base/base.h"
  63 #include "orte/mca/routed/routed.h"
  64 #include "orte/mca/rtc/base/base.h"
  65 #include "orte/mca/errmgr/base/base.h"
  66 #include "orte/mca/grpcomm/base/base.h"
  67 #include "orte/mca/iof/base/base.h"
  68 #include "orte/mca/ras/base/base.h"
  69 #include "orte/mca/plm/base/base.h"
  70 #include "orte/mca/plm/plm.h"
  71 #include "orte/mca/odls/base/base.h"
  72 #include "orte/mca/rmaps/base/base.h"
  73 #include "orte/mca/filem/base/base.h"
  74 #include "orte/mca/state/base/base.h"
  75 #include "orte/mca/state/state.h"
  76 
  77 #include "orte/orted/orted_submit.h"
  78 #include "orte/orted/pmix/pmix_server.h"
  79 
  80 #include "orte/util/show_help.h"
  81 #include "orte/util/proc_info.h"
  82 #include "orte/util/session_dir.h"
  83 #include "orte/util/hnp_contact.h"
  84 #include "orte/util/name_fns.h"
  85 #include "orte/util/show_help.h"
  86 #include "orte/util/comm/comm.h"
  87 
  88 #include "orte/runtime/runtime.h"
  89 #include "orte/runtime/orte_wait.h"
  90 #include "orte/runtime/orte_globals.h"
  91 #include "orte/runtime/orte_quit.h"
  92 #include "orte/runtime/orte_locks.h"
  93 
  94 #include "orte/mca/ess/ess.h"
  95 #include "orte/mca/ess/base/base.h"
  96 #include "orte/mca/ess/hnp/ess_hnp.h"
  97 
  98 static int rte_init(void);
  99 static int rte_finalize(void);
 100 static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
 101 
 102 orte_ess_base_module_t orte_ess_hnp_module = {
 103     rte_init,
 104     rte_finalize,
 105     rte_abort,
 106     NULL 
 107 };
 108 
 109 
 110 static bool signals_set=false;
 111 static bool forcibly_die=false;
 112 static opal_event_t term_handler;
 113 static opal_event_t epipe_handler;
 114 static int term_pipe[2];
 115 static opal_event_t *forward_signals_events = NULL;
 116 
 117 static void abort_signal_callback(int signal);
 118 static void clean_abort(int fd, short flags, void *arg);
 119 static void epipe_signal_callback(int fd, short flags, void *arg);
 120 static void signal_forward_callback(int fd, short event, void *arg);
 121 
 122 static void setup_sighandler(int signal, opal_event_t *ev,
 123                              opal_event_cbfunc_t cbfunc)
 124 {
 125     opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev);
 126     opal_event_set_priority(ev, ORTE_ERROR_PRI);
 127     opal_event_signal_add(ev, NULL);
 128 }
 129 
 130 static int rte_init(void)
 131 {
 132     int ret;
 133     char *error = NULL;
 134     char *contact_path;
 135     orte_job_t *jdata;
 136     orte_node_t *node;
 137     orte_proc_t *proc;
 138     orte_app_context_t *app;
 139     char **aliases, *aptr;
 140     char *coprocessors, **sns;
 141     uint32_t h;
 142     int idx;
 143     orte_topology_t *t;
 144     orte_ess_base_signal_t *sig;
 145     opal_value_t val;
 146 
 147     
 148     if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
 149         error = "orte_ess_base_std_prolog";
 150         goto error;
 151     }
 152 
 153     
 154     setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback);
 155     
 156 
 157 
 158 
 159 
 160 
 161 
 162 
 163 
 164 
 165 
 166 
 167 
 168 
 169     pipe(term_pipe);
 170     
 171     opal_event_set(orte_event_base, &term_handler, term_pipe[0], OPAL_EV_READ, clean_abort, NULL);
 172     opal_event_set_priority(&term_handler, ORTE_ERROR_PRI);
 173     opal_event_add(&term_handler, NULL);
 174 
 175     
 176 
 177     if (opal_fd_set_cloexec(term_pipe[0]) != OPAL_SUCCESS ||
 178         opal_fd_set_cloexec(term_pipe[1]) != OPAL_SUCCESS) {
 179         error = "unable to set the pipe to CLOEXEC";
 180         goto error;
 181     }
 182 
 183     
 184     signal(SIGTERM, abort_signal_callback);
 185     signal(SIGINT, abort_signal_callback);
 186     signal(SIGHUP, abort_signal_callback);
 187 
 188     
 189     if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) {
 190         forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx);
 191         if (NULL == forward_signals_events) {
 192             ret = ORTE_ERR_OUT_OF_RESOURCE;
 193             error = "unable to malloc";
 194             goto error;
 195         }
 196         idx = 0;
 197         OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
 198             setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback);
 199             ++idx;
 200         }
 201     }
 202     signals_set = true;
 203 
 204     
 205     if (NULL == opal_hwloc_topology) {
 206         if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
 207             error = "topology discovery";
 208             goto error;
 209         }
 210     }
 211 
 212     
 213     if (orte_xml_output) {
 214         fprintf(orte_xml_fp, "<mpirun>\n");
 215         fflush(orte_xml_fp);
 216     }
 217 
 218     
 219 
 220 
 221     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) {
 222         error = "opal_pstat_base_open";
 223         goto error;
 224     }
 225     if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) {
 226         error = "opal_pstat_base_select";
 227         goto error;
 228     }
 229 
 230     
 231     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
 232         error = "orte_state_base_open";
 233         goto error;
 234     }
 235     if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
 236         error = "orte_state_base_select";
 237         goto error;
 238     }
 239 
 240     
 241     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
 242         error = "orte_errmgr_base_open";
 243         goto error;
 244     }
 245 
 246     
 247 
 248 
 249 
 250 
 251     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
 252         error = "orte_plm_base_open";
 253         goto error;
 254     }
 255     if (ORTE_SUCCESS != (ret = orte_plm_base_select())) {
 256         error = "orte_plm_base_select";
 257         if (ORTE_ERR_FATAL == ret) {
 258             
 259             ret = ORTE_ERR_SILENT;
 260         }
 261         goto error;
 262     }
 263     
 264     if (NULL != orte_ess_base_jobid) {
 265         if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&ORTE_PROC_MY_NAME->jobid, orte_ess_base_jobid))) {
 266             error = "convert_string_to_jobid";
 267             goto error;
 268         }
 269         ORTE_PROC_MY_NAME->vpid = 0;
 270     } else {
 271         if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) {
 272             error = "orte_plm_set_hnp_name";
 273             goto error;
 274         }
 275     }
 276     
 277     orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
 278     orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
 279     orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
 280     orte_process_info.super.proc_arch = opal_local_arch;
 281     opal_proc_local_set(&orte_process_info.super);
 282 
 283     
 284     if (orte_create_session_dirs) {
 285         OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 286                              "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
 287                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 288                              (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
 289                              orte_process_info.nodename));
 290         
 291 
 292 
 293         if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
 294             error = "orte_session_dir define";
 295             goto error;
 296         }
 297         
 298 
 299 
 300         orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 301 
 302         
 303         if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
 304             error = "orte_session_dir";
 305             goto error;
 306         }
 307     }
 308 
 309     
 310 
 311     opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
 312     if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
 313         ORTE_ERROR_LOG(ret);
 314         error = "orte_pmix_base_open";
 315         goto error;
 316     }
 317     if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
 318         ORTE_ERROR_LOG(ret);
 319         error = "opal_pmix_base_select";
 320         goto error;
 321     }
 322     
 323     opal_pmix_base_set_evbase(orte_event_base);
 324     
 325 
 326 
 327     if (ORTE_SUCCESS != (ret = pmix_server_init())) {
 328         
 329         ret = ORTE_ERR_SILENT;
 330         error = "pmix_server_init";
 331         goto error;
 332     }
 333 
 334     
 335     
 336 
 337 
 338     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
 339         ORTE_ERROR_LOG(ret);
 340         error = "orte_rml_base_open";
 341         goto error;
 342     }
 343     if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
 344         ORTE_ERROR_LOG(ret);
 345         error = "orte_routed_base_select";
 346         goto error;
 347     }
 348     
 349 
 350 
 351     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
 352         error = "orte_oob_base_open";
 353         goto error;
 354     }
 355     if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
 356         error = "orte_oob_base_select";
 357         goto error;
 358     }
 359 
 360     
 361 
 362 
 363     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
 364         error = "orte_rml_base_open";
 365         goto error;
 366     }
 367     if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
 368         error = "orte_rml_base_select";
 369         goto error;
 370     }
 371 
 372     
 373     pmix_server_start();
 374 
 375     
 376 
 377 
 378     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
 379         ORTE_ERROR_LOG(ret);
 380         error = "orte_grpcomm_base_open";
 381         goto error;
 382     }
 383     if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
 384         ORTE_ERROR_LOG(ret);
 385         error = "orte_grpcomm_base_select";
 386         goto error;
 387     }
 388 
 389 
 390     
 391     if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
 392         error = "orte_errmgr_base_select";
 393         goto error;
 394     }
 395     
 396     orte_job_data = OBJ_NEW(opal_hash_table_t);
 397     if (ORTE_SUCCESS != (ret = opal_hash_table_init(orte_job_data, 128))) {
 398         ORTE_ERROR_LOG(ret);
 399         error = "setup job array";
 400         goto error;
 401     }
 402     orte_node_pool = OBJ_NEW(opal_pointer_array_t);
 403     if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
 404                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 405                                                        ORTE_GLOBAL_ARRAY_MAX_SIZE,
 406                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
 407         ORTE_ERROR_LOG(ret);
 408         error = "setup node array";
 409         goto error;
 410     }
 411     orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
 412     if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
 413                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 414                                                        ORTE_GLOBAL_ARRAY_MAX_SIZE,
 415                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
 416         ORTE_ERROR_LOG(ret);
 417         error = "setup node topologies array";
 418         goto error;
 419     }
 420     
 421     
 422     jdata = OBJ_NEW(orte_job_t);
 423     jdata->jobid = ORTE_PROC_MY_NAME->jobid;
 424     opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
 425     
 426 
 427 
 428 
 429     jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
 430 
 431     
 432     app = OBJ_NEW(orte_app_context_t);
 433     opal_pointer_array_set_item(jdata->apps, 0, app);
 434     jdata->num_apps++;
 435     
 436     node = OBJ_NEW(orte_node_t);
 437     node->name = strdup(orte_process_info.nodename);
 438     node->index = ORTE_PROC_MY_NAME->vpid;
 439     opal_pointer_array_set_item(orte_node_pool, 0, node);
 440 
 441     
 442     proc = OBJ_NEW(orte_proc_t);
 443     proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
 444     proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
 445     proc->pid = orte_process_info.pid;
 446     orte_oob_base_get_addr(&proc->rml_uri);
 447     orte_process_info.my_hnp_uri = strdup(proc->rml_uri);
 448     
 449     OBJ_CONSTRUCT(&val, opal_value_t);
 450     val.key = OPAL_PMIX_PROC_URI;
 451     val.type = OPAL_STRING;
 452     val.data.string = proc->rml_uri;
 453     if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &val))) {
 454         ORTE_ERROR_LOG(ret);
 455         val.key = NULL;
 456         val.data.string = NULL;
 457         OBJ_DESTRUCT(&val);
 458         error = "store uri";
 459         goto error;
 460     }
 461     val.key = NULL;
 462     val.data.string = NULL;
 463     OBJ_DESTRUCT(&val);
 464     
 465     orte_process_info.my_daemon_uri = strdup(proc->rml_uri);
 466     proc->state = ORTE_PROC_STATE_RUNNING;
 467     OBJ_RETAIN(node);  
 468     proc->node = node;
 469     opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc);
 470     
 471 
 472 
 473 
 474 
 475 
 476     OBJ_RETAIN(proc);   
 477     node->daemon = proc;
 478     ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
 479     node->state = ORTE_NODE_STATE_UP;
 480     
 481     if (orte_retain_aliases) {
 482         aliases = NULL;
 483         opal_ifgetaliases(&aliases);
 484         if (0 < opal_argv_count(aliases)) {
 485             
 486             opal_argv_append_nosize(&aliases, orte_process_info.nodename);
 487             aptr = opal_argv_join(aliases, ',');
 488             orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
 489             free(aptr);
 490         }
 491         opal_argv_free(aliases);
 492     }
 493     
 494     jdata->num_procs = 1;
 495     jdata->state = ORTE_JOB_STATE_RUNNING;
 496     
 497     jdata->num_reported = 1;
 498 
 499     
 500 
 501 
 502 
 503 
 504     if (ORTE_SUCCESS != (ret = orte_plm.init())) {
 505         ORTE_ERROR_LOG(ret);
 506         error = "orte_plm_init";
 507         goto error;
 508     }
 509     
 510 
 511 
 512 
 513 
 514 
 515     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ras_base_framework, 0))) {
 516         ORTE_ERROR_LOG(ret);
 517         error = "orte_ras_base_open";
 518         goto error;
 519     }
 520     if (ORTE_SUCCESS != (ret = orte_ras_base_select())) {
 521         ORTE_ERROR_LOG(ret);
 522         error = "orte_ras_base_find_available";
 523         goto error;
 524     }
 525     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) {
 526         ORTE_ERROR_LOG(ret);
 527         error = "orte_rmaps_base_open";
 528         goto error;
 529     }
 530     if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) {
 531         ORTE_ERROR_LOG(ret);
 532         error = "orte_rmaps_base_find_available";
 533         goto error;
 534     }
 535 
 536     
 537 
 538 
 539 
 540     
 541     t = OBJ_NEW(orte_topology_t);
 542     t->topo = opal_hwloc_topology;
 543     
 544     orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology);
 545     t->sig = strdup(orte_topo_signature);
 546     opal_pointer_array_add(orte_node_topologies, t);
 547     node->topology = t;
 548     if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
 549         opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 550         opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
 551     }
 552 
 553 
 554     
 555     if (NULL == orte_coprocessors) {
 556         orte_coprocessors = OBJ_NEW(opal_hash_table_t);
 557         opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
 558     }
 559     
 560     coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology);
 561     if (NULL != coprocessors) {
 562         
 563 
 564 
 565         sns = opal_argv_split(coprocessors, ',');
 566         for (idx=0; NULL != sns[idx]; idx++) {
 567             
 568             OPAL_HASH_STR(sns[idx], h);
 569             
 570             opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid));
 571         }
 572         opal_argv_free(sns);
 573         free(coprocessors);
 574         orte_coprocessors_detected = true;
 575     }
 576     
 577     coprocessors = opal_hwloc_base_check_on_coprocessor();
 578     if (NULL != coprocessors) {
 579         
 580         OPAL_HASH_STR(coprocessors, h);
 581         
 582         opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid));
 583         orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING);
 584         free(coprocessors);
 585         orte_coprocessors_detected = true;
 586     }
 587 
 588     
 589     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) {
 590         ORTE_ERROR_LOG(ret);
 591         error = "orte_odls_base_open";
 592         goto error;
 593     }
 594     if (ORTE_SUCCESS != (ret = orte_odls_base_select())) {
 595         ORTE_ERROR_LOG(ret);
 596         error = "orte_odls_base_select";
 597         goto error;
 598     }
 599     
 600     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) {
 601         ORTE_ERROR_LOG(ret);
 602         error = "orte_rtc_base_open";
 603         goto error;
 604     }
 605     if (ORTE_SUCCESS != (ret = orte_rtc_base_select())) {
 606         ORTE_ERROR_LOG(ret);
 607         error = "orte_rtc_base_select";
 608         goto error;
 609     }
 610 
 611     
 612     orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP,
 613                             ORTE_RML_PERSISTENT, orte_show_help_recv, NULL);
 614 
 615     if (orte_create_session_dirs) {
 616         
 617 
 618         opal_output_set_output_file_info(orte_process_info.proc_session_dir,
 619                                          "output-", NULL, NULL);
 620         
 621         if( NULL == orte_process_info.jobfam_session_dir ){
 622             
 623             ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 624             goto error;
 625         }
 626         contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, "contact.txt", NULL);
 627         OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 628                              "%s writing contact file %s",
 629                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 630                              contact_path));
 631 
 632         if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) {
 633             OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 634                                  "%s writing contact file failed with error %s",
 635                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 636                                  ORTE_ERROR_NAME(ret)));
 637         } else {
 638             OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 639                                  "%s wrote contact file",
 640                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 641         }
 642         free(contact_path);
 643     }
 644 
 645     
 646     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
 647         ORTE_ERROR_LOG(ret);
 648         error = "orte_iof_base_open";
 649         goto error;
 650     }
 651     if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
 652         ORTE_ERROR_LOG(ret);
 653         error = "orte_iof_base_select";
 654         goto error;
 655     }
 656     
 657     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) {
 658         ORTE_ERROR_LOG(ret);
 659         error = "orte_filem_base_open";
 660         goto error;
 661     }
 662     if (ORTE_SUCCESS != (ret = orte_filem_base_select())) {
 663         ORTE_ERROR_LOG(ret);
 664         error = "orte_filem_base_select";
 665         goto error;
 666     }
 667 
 668     
 669     orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,
 670                              orte_debugger_init_after_spawn,
 671                              ORTE_SYS_PRI);
 672     orte_state.add_job_state(ORTE_JOB_STATE_DEBUGGER_DETACH,
 673                              orte_debugger_detached,
 674                              ORTE_SYS_PRI);
 675 
 676     
 677 
 678 
 679     if (orte_report_events) {
 680         if (ORTE_SUCCESS != (ret = orte_util_comm_connect_tool(orte_report_events_uri))) {
 681             error = "could not connect to tool";
 682             goto error;
 683         }
 684     }
 685     
 686 
 687 
 688 
 689 
 690 
 691 
 692 
 693 
 694 
 695 
 696 
 697 
 698 
 699 
 700 
 701 
 702     opal_progress_set_yield_when_idle(false);
 703     return ORTE_SUCCESS;
 704 
 705   error:
 706     if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
 707         orte_show_help("help-orte-runtime.txt",
 708                        "orte_init:startup:internal-failure",
 709                        true, error, ORTE_ERROR_NAME(ret), ret);
 710     }
 711     
 712     if (NULL != orte_process_info.jobfam_session_dir) {
 713         contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
 714                                     "contact.txt", NULL);
 715         unlink(contact_path);
 716         free(contact_path);
 717     }
 718     
 719     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 720     
 721     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 722     return ORTE_ERR_SILENT;
 723 }
 724 
 725 static int rte_finalize(void)
 726 {
 727     char *contact_path;
 728     orte_job_t *jdata;
 729     uint32_t key;
 730     orte_ess_base_signal_t *sig;
 731     unsigned int i;
 732 
 733     if (signals_set) {
 734         
 735         opal_event_signal_del(&epipe_handler);
 736         
 737         opal_event_del(&term_handler);
 738         
 739         i = 0;
 740         OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
 741             opal_event_signal_del(forward_signals_events + i);
 742             ++i;
 743         }
 744         free (forward_signals_events);
 745         forward_signals_events = NULL;
 746         signals_set = false;
 747     }
 748 
 749     
 750     pmix_server_finalize();
 751     (void) mca_base_framework_close(&opal_pmix_base_framework);
 752     (void) mca_base_framework_close(&orte_filem_base_framework);
 753     
 754     fflush(stdout);
 755     fflush(stderr);
 756 
 757     (void) mca_base_framework_close(&orte_iof_base_framework);
 758     (void) mca_base_framework_close(&orte_rtc_base_framework);
 759     (void) mca_base_framework_close(&orte_odls_base_framework);
 760     (void) mca_base_framework_close(&orte_rmaps_base_framework);
 761     (void) mca_base_framework_close(&orte_ras_base_framework);
 762     (void) mca_base_framework_close(&orte_grpcomm_base_framework);
 763     (void) mca_base_framework_close(&orte_routed_base_framework);
 764     (void) mca_base_framework_close(&orte_plm_base_framework);
 765     
 766 
 767     orte_errmgr.finalize();
 768 
 769     
 770     (void) mca_base_framework_close(&opal_pstat_base_framework);
 771 
 772     
 773     if (NULL != orte_process_info.jobfam_session_dir) {
 774         contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
 775                                     "contact.txt", NULL);
 776         unlink(contact_path);
 777         free(contact_path);
 778     }
 779 
 780     
 781     (void) mca_base_framework_close(&orte_rml_base_framework);
 782     (void) mca_base_framework_close(&orte_oob_base_framework);
 783     (void) mca_base_framework_close(&orte_errmgr_base_framework);
 784     (void) mca_base_framework_close(&orte_state_base_framework);
 785 
 786     
 787     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 788     
 789     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 790 
 791     
 792     if (orte_xml_output) {
 793         fprintf(orte_xml_fp, "</mpirun>\n");
 794         fflush(orte_xml_fp);
 795         if (stdout != orte_xml_fp) {
 796             fclose(orte_xml_fp);
 797         }
 798     }
 799 
 800     
 801     OPAL_HASH_TABLE_FOREACH(key, uint32, jdata, orte_job_data) {
 802         if (NULL != jdata) {
 803             OBJ_RELEASE(jdata);
 804         }
 805     }
 806     OBJ_RELEASE(orte_job_data);
 807 
 808     if (NULL != orte_process_info.super.proc_hostname) {
 809         free(orte_process_info.super.proc_hostname);
 810     }
 811     if (orte_do_not_launch) {
 812         exit(0);
 813     }
 814 
 815 {
 816     opal_pointer_array_t * array = orte_node_topologies;
 817     int i;
 818     if( array->number_free != array->size ) {
 819         OPAL_THREAD_LOCK(&array->lock);
 820         array->lowest_free = 0;
 821         array->number_free = array->size;
 822         for(i=0; i<array->size; i++) {
 823             if(NULL != array->addr[i]) {
 824                 orte_topology_t * topo = (orte_topology_t *)array->addr[i];
 825                 topo->topo = NULL;
 826                 OBJ_RELEASE(topo);
 827             }
 828             array->addr[i] = NULL;
 829         }
 830         OPAL_THREAD_UNLOCK(&array->lock);
 831     }
 832 }
 833     OBJ_RELEASE(orte_node_topologies);
 834 
 835 {
 836     opal_pointer_array_t * array = orte_node_pool;
 837     int i;
 838     orte_node_t* node = (orte_node_t *)opal_pointer_array_get_item(orte_node_pool, 0);
 839     assert(NULL != node);
 840     OBJ_RELEASE(node->daemon);
 841     node->daemon = NULL;
 842     if( array->number_free != array->size ) {
 843         OPAL_THREAD_LOCK(&array->lock);
 844         array->lowest_free = 0;
 845         array->number_free = array->size;
 846         for(i=0; i<array->size; i++) {
 847             if(NULL != array->addr[i]) {
 848                 node= (orte_node_t*)array->addr[i];
 849                 OBJ_RELEASE(node);
 850             }
 851             array->addr[i] = NULL;
 852         }
 853         OPAL_THREAD_UNLOCK(&array->lock);
 854     }
 855 }
 856     OBJ_RELEASE(orte_node_pool);
 857 
 858     free(orte_topo_signature);
 859 
 860     return ORTE_SUCCESS;
 861 }
 862 
 863 static void rte_abort(int status, bool report)
 864 {
 865     
 866 
 867 
 868 
 869 
 870 
 871 
 872 
 873 
 874     
 875     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 876     
 877 
 878 
 879     orte_proc_info_finalize();
 880     
 881     exit(status);
 882 }
 883 
 884 static void clean_abort(int fd, short flags, void *arg)
 885 {
 886     
 887 
 888 
 889     if (opal_atomic_trylock(&orte_abort_inprogress_lock)) { 
 890         if (forcibly_die) {
 891             
 892             orte_odls.kill_local_procs(NULL);
 893             
 894             orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 895             
 896             opal_pmix.finalize();
 897             
 898             exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
 899         }
 900         fprintf(stderr, "%s: abort is already in progress...hit ctrl-c again to forcibly terminate\n\n", orte_basename);
 901         forcibly_die = true;
 902         
 903         opal_event_add(&term_handler, NULL);
 904         return;
 905     }
 906     
 907     ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
 908 
 909     
 910     orte_job_term_ordered = true;
 911     
 912 
 913 
 914     orte_execute_quiet = true;
 915     
 916 
 917 
 918 
 919 
 920     orte_plm.terminate_orteds();;
 921 }
 922 
 923 static struct timeval current, last={0,0};
 924 static bool first = true;
 925 
 926 
 927 
 928 
 929 
 930 static void abort_signal_callback(int fd)
 931 {
 932     uint8_t foo = 1;
 933     char *msg = "Abort is in progress...hit ctrl-c again within 5 seconds to forcibly terminate\n\n";
 934 
 935     
 936 
 937 
 938     if (first) {
 939         first = false;
 940         gettimeofday(¤t, NULL);
 941     } else {
 942         
 943         gettimeofday(¤t, NULL);
 944         
 945 
 946 
 947 
 948         if ((current.tv_sec - last.tv_sec) < 5) {
 949             exit(1);
 950         }
 951         write(1, (void*)msg, strlen(msg));
 952     }
 953     
 954     last.tv_sec = current.tv_sec;
 955     
 956     write(term_pipe[1], &foo, 1);
 957 }
 958 
 959 
 960 
 961 
 962 static int sigpipe_error_count=0;
 963 static void epipe_signal_callback(int fd, short flags, void *arg)
 964 {
 965     sigpipe_error_count++;
 966 
 967     if (10 < sigpipe_error_count) {
 968         
 969         opal_output(0, "%s: SIGPIPE detected on fd %d - aborting", orte_basename, fd);
 970         clean_abort(0, 0, NULL);
 971     }
 972 
 973     return;
 974 }
 975 
 976 
 977 
 978 
 979 static void  signal_forward_callback(int fd, short event, void *arg)
 980 {
 981     opal_event_t *signal = (opal_event_t*)arg;
 982     int signum, ret;
 983 
 984     signum = OPAL_EVENT_SIGNAL(signal);
 985     if (!orte_execute_quiet){
 986         fprintf(stderr, "%s: Forwarding signal %d to job\n",
 987                 orte_basename, signum);
 988     }
 989 
 990     
 991     if (ORTE_SUCCESS != (ret = orte_plm.signal_job(ORTE_JOBID_WILDCARD, signum))) {
 992         fprintf(stderr, "Signal %d could not be sent to the job (returned %d)",
 993                 signum, ret);
 994     }
 995 }