root/orte/mca/ess/hnp/ess_hnp_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. setup_sighandler
  2. rte_init
  3. rte_finalize
  4. rte_abort
  5. clean_abort
  6. abort_signal_callback
  7. epipe_signal_callback
  8. signal_forward_callback

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2018 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2010-2011 Oak Ridge National Labs.  All rights reserved.
  14  * Copyright (c) 2011-2014 Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2011-2017 Los Alamos National Security, LLC.  All rights
  16  *                         reserved.
  17  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  18  * Copyright (c) 2017-2018 Research Organization for Information Science
  19  *                         and Technology (RIST). All rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  *
  26  */
  27 
  28 #include "orte_config.h"
  29 #include "orte/constants.h"
  30 
  31 #include <sys/types.h>
  32 #include <stdio.h>
  33 #ifdef HAVE_FCNTL_H
  34 #include <fcntl.h>
  35 #endif
  36 #ifdef HAVE_UNISTD_H
  37 #include <unistd.h>
  38 #endif
  39 
  40 #include "opal/hash_string.h"
  41 #include "opal/class/opal_hash_table.h"
  42 #include "opal/class/opal_list.h"
  43 #include "opal/mca/event/event.h"
  44 #include "opal/runtime/opal.h"
  45 
  46 #include "opal/util/arch.h"
  47 #include "opal/util/argv.h"
  48 #include "opal/util/if.h"
  49 #include "opal/util/os_path.h"
  50 #include "opal/util/output.h"
  51 #include "opal/util/opal_environ.h"
  52 #include "opal/util/malloc.h"
  53 #include "opal/util/basename.h"
  54 #include "opal/util/fd.h"
  55 #include "opal/mca/pmix/base/base.h"
  56 #include "opal/mca/pstat/base/base.h"
  57 #include "opal/mca/hwloc/base/base.h"
  58 
  59 #include "orte/mca/oob/base/base.h"
  60 #include "orte/mca/rml/base/base.h"
  61 #include "orte/mca/rml/rml_types.h"
  62 #include "orte/mca/routed/base/base.h"
  63 #include "orte/mca/routed/routed.h"
  64 #include "orte/mca/rtc/base/base.h"
  65 #include "orte/mca/errmgr/base/base.h"
  66 #include "orte/mca/grpcomm/base/base.h"
  67 #include "orte/mca/iof/base/base.h"
  68 #include "orte/mca/ras/base/base.h"
  69 #include "orte/mca/plm/base/base.h"
  70 #include "orte/mca/plm/plm.h"
  71 #include "orte/mca/odls/base/base.h"
  72 #include "orte/mca/rmaps/base/base.h"
  73 #include "orte/mca/filem/base/base.h"
  74 #include "orte/mca/state/base/base.h"
  75 #include "orte/mca/state/state.h"
  76 
  77 #include "orte/orted/orted_submit.h"
  78 #include "orte/orted/pmix/pmix_server.h"
  79 
  80 #include "orte/util/show_help.h"
  81 #include "orte/util/proc_info.h"
  82 #include "orte/util/session_dir.h"
  83 #include "orte/util/hnp_contact.h"
  84 #include "orte/util/name_fns.h"
  85 #include "orte/util/show_help.h"
  86 #include "orte/util/comm/comm.h"
  87 
  88 #include "orte/runtime/runtime.h"
  89 #include "orte/runtime/orte_wait.h"
  90 #include "orte/runtime/orte_globals.h"
  91 #include "orte/runtime/orte_quit.h"
  92 #include "orte/runtime/orte_locks.h"
  93 
  94 #include "orte/mca/ess/ess.h"
  95 #include "orte/mca/ess/base/base.h"
  96 #include "orte/mca/ess/hnp/ess_hnp.h"
  97 
  98 static int rte_init(void);
  99 static int rte_finalize(void);
 100 static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
 101 
 102 orte_ess_base_module_t orte_ess_hnp_module = {
 103     rte_init,
 104     rte_finalize,
 105     rte_abort,
 106     NULL /* ft_event */
 107 };
 108 
 109 /* local globals */
 110 static bool signals_set=false;
 111 static bool forcibly_die=false;
 112 static opal_event_t term_handler;
 113 static opal_event_t epipe_handler;
 114 static int term_pipe[2];
 115 static opal_event_t *forward_signals_events = NULL;
 116 
 117 static void abort_signal_callback(int signal);
 118 static void clean_abort(int fd, short flags, void *arg);
 119 static void epipe_signal_callback(int fd, short flags, void *arg);
 120 static void signal_forward_callback(int fd, short event, void *arg);
 121 
 122 static void setup_sighandler(int signal, opal_event_t *ev,
 123                              opal_event_cbfunc_t cbfunc)
 124 {
 125     opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev);
 126     opal_event_set_priority(ev, ORTE_ERROR_PRI);
 127     opal_event_signal_add(ev, NULL);
 128 }
 129 
 130 static int rte_init(void)
 131 {
 132     int ret;
 133     char *error = NULL;
 134     char *contact_path;
 135     orte_job_t *jdata;
 136     orte_node_t *node;
 137     orte_proc_t *proc;
 138     orte_app_context_t *app;
 139     char **aliases, *aptr;
 140     char *coprocessors, **sns;
 141     uint32_t h;
 142     int idx;
 143     orte_topology_t *t;
 144     orte_ess_base_signal_t *sig;
 145     opal_value_t val;
 146 
 147     /* run the prolog */
 148     if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
 149         error = "orte_ess_base_std_prolog";
 150         goto error;
 151     }
 152 
 153     /* setup callback for SIGPIPE */
 154     setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback);
 155     /** setup callbacks for abort signals - from this point
 156      * forward, we need to abort in a manner that allows us
 157      * to cleanup. However, we cannot directly use libevent
 158      * to trap these signals as otherwise we cannot respond
 159      * to them if we are stuck in an event! So instead use
 160      * the basic POSIX trap functions to handle the signal,
 161      * and then let that signal handler do some magic to
 162      * avoid the hang
 163      *
 164      * NOTE: posix traps don't allow us to do anything major
 165      * in them, so use a pipe tied to a libevent event to
 166      * reach a "safe" place where the termination event can
 167      * be created
 168      */
 169     pipe(term_pipe);
 170     /* setup an event to attempt normal termination on signal */
 171     opal_event_set(orte_event_base, &term_handler, term_pipe[0], OPAL_EV_READ, clean_abort, NULL);
 172     opal_event_set_priority(&term_handler, ORTE_ERROR_PRI);
 173     opal_event_add(&term_handler, NULL);
 174 
 175     /* Set both ends of this pipe to be close-on-exec so that no
 176        children inherit it */
 177     if (opal_fd_set_cloexec(term_pipe[0]) != OPAL_SUCCESS ||
 178         opal_fd_set_cloexec(term_pipe[1]) != OPAL_SUCCESS) {
 179         error = "unable to set the pipe to CLOEXEC";
 180         goto error;
 181     }
 182 
 183     /* point the signal trap to a function that will activate that event */
 184     signal(SIGTERM, abort_signal_callback);
 185     signal(SIGINT, abort_signal_callback);
 186     signal(SIGHUP, abort_signal_callback);
 187 
 188     /** setup callbacks for signals we should forward */
 189     if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) {
 190         forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx);
 191         if (NULL == forward_signals_events) {
 192             ret = ORTE_ERR_OUT_OF_RESOURCE;
 193             error = "unable to malloc";
 194             goto error;
 195         }
 196         idx = 0;
 197         OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
 198             setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback);
 199             ++idx;
 200         }
 201     }
 202     signals_set = true;
 203 
 204     /* get the local topology */
 205     if (NULL == opal_hwloc_topology) {
 206         if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
 207             error = "topology discovery";
 208             goto error;
 209         }
 210     }
 211 
 212     /* if we are using xml for output, put an mpirun start tag */
 213     if (orte_xml_output) {
 214         fprintf(orte_xml_fp, "<mpirun>\n");
 215         fflush(orte_xml_fp);
 216     }
 217 
 218     /* open and setup the opal_pstat framework so we can provide
 219      * process stats if requested
 220      */
 221     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&opal_pstat_base_framework, 0))) {
 222         error = "opal_pstat_base_open";
 223         goto error;
 224     }
 225     if (ORTE_SUCCESS != (ret = opal_pstat_base_select())) {
 226         error = "opal_pstat_base_select";
 227         goto error;
 228     }
 229 
 230     /* open and setup the state machine */
 231     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
 232         error = "orte_state_base_open";
 233         goto error;
 234     }
 235     if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
 236         error = "orte_state_base_select";
 237         goto error;
 238     }
 239 
 240     /* open the errmgr */
 241     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
 242         error = "orte_errmgr_base_open";
 243         goto error;
 244     }
 245 
 246     /* Since we are the HNP, then responsibility for
 247      * defining the name falls to the PLM component for our
 248      * respective environment - hence, we have to open the PLM
 249      * first and select that component.
 250      */
 251     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) {
 252         error = "orte_plm_base_open";
 253         goto error;
 254     }
 255     if (ORTE_SUCCESS != (ret = orte_plm_base_select())) {
 256         error = "orte_plm_base_select";
 257         if (ORTE_ERR_FATAL == ret) {
 258             /* we already output a show_help - so keep down the verbage */
 259             ret = ORTE_ERR_SILENT;
 260         }
 261         goto error;
 262     }
 263     /* if we were spawned by a singleton, our jobid was given to us */
 264     if (NULL != orte_ess_base_jobid) {
 265         if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&ORTE_PROC_MY_NAME->jobid, orte_ess_base_jobid))) {
 266             error = "convert_string_to_jobid";
 267             goto error;
 268         }
 269         ORTE_PROC_MY_NAME->vpid = 0;
 270     } else {
 271         if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) {
 272             error = "orte_plm_set_hnp_name";
 273             goto error;
 274         }
 275     }
 276     /* now that my name is set, xfer it to the OPAL layer */
 277     orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
 278     orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
 279     orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
 280     orte_process_info.super.proc_arch = opal_local_arch;
 281     opal_proc_local_set(&orte_process_info.super);
 282 
 283     /* setup my session directory here as the OOB may need it */
 284     if (orte_create_session_dirs) {
 285         OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 286                              "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
 287                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 288                              (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
 289                              orte_process_info.nodename));
 290         /* take a pass thru the session directory code to fillin the
 291          * tmpdir names - don't create anything yet
 292          */
 293         if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
 294             error = "orte_session_dir define";
 295             goto error;
 296         }
 297         /* clear the session directory just in case there are
 298          * stale directories laying around
 299          */
 300         orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 301 
 302         /* now actually create the directory tree */
 303         if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
 304             error = "orte_session_dir";
 305             goto error;
 306         }
 307     }
 308 
 309     /* setup the PMIx framework - ensure it skips all non-PMIx components, but
 310      * do not override anything we were given */
 311     opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
 312     if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
 313         ORTE_ERROR_LOG(ret);
 314         error = "orte_pmix_base_open";
 315         goto error;
 316     }
 317     if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
 318         ORTE_ERROR_LOG(ret);
 319         error = "opal_pmix_base_select";
 320         goto error;
 321     }
 322     /* set the event base */
 323     opal_pmix_base_set_evbase(orte_event_base);
 324     /* setup the PMIx server - we need this here in case the
 325      * communications infrastructure wants to register
 326      * information */
 327     if (ORTE_SUCCESS != (ret = pmix_server_init())) {
 328         /* the server code already barked, so let's be quiet */
 329         ret = ORTE_ERR_SILENT;
 330         error = "pmix_server_init";
 331         goto error;
 332     }
 333 
 334     /* Setup the communication infrastructure */
 335     /*
 336      * Routed system
 337      */
 338     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
 339         ORTE_ERROR_LOG(ret);
 340         error = "orte_rml_base_open";
 341         goto error;
 342     }
 343     if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
 344         ORTE_ERROR_LOG(ret);
 345         error = "orte_routed_base_select";
 346         goto error;
 347     }
 348     /*
 349      * OOB Layer
 350      */
 351     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
 352         error = "orte_oob_base_open";
 353         goto error;
 354     }
 355     if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
 356         error = "orte_oob_base_select";
 357         goto error;
 358     }
 359 
 360     /*
 361      * Runtime Messaging Layer
 362      */
 363     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
 364         error = "orte_rml_base_open";
 365         goto error;
 366     }
 367     if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
 368         error = "orte_rml_base_select";
 369         goto error;
 370     }
 371 
 372     /* it is now safe to start the pmix server */
 373     pmix_server_start();
 374 
 375     /*
 376      * Group communications
 377      */
 378     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
 379         ORTE_ERROR_LOG(ret);
 380         error = "orte_grpcomm_base_open";
 381         goto error;
 382     }
 383     if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
 384         ORTE_ERROR_LOG(ret);
 385         error = "orte_grpcomm_base_select";
 386         goto error;
 387     }
 388 
 389 
 390     /* setup the error manager */
 391     if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
 392         error = "orte_errmgr_base_select";
 393         goto error;
 394     }
 395     /* setup the global job and node arrays */
 396     orte_job_data = OBJ_NEW(opal_hash_table_t);
 397     if (ORTE_SUCCESS != (ret = opal_hash_table_init(orte_job_data, 128))) {
 398         ORTE_ERROR_LOG(ret);
 399         error = "setup job array";
 400         goto error;
 401     }
 402     orte_node_pool = OBJ_NEW(opal_pointer_array_t);
 403     if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
 404                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 405                                                        ORTE_GLOBAL_ARRAY_MAX_SIZE,
 406                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
 407         ORTE_ERROR_LOG(ret);
 408         error = "setup node array";
 409         goto error;
 410     }
 411     orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
 412     if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
 413                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 414                                                        ORTE_GLOBAL_ARRAY_MAX_SIZE,
 415                                                        ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
 416         ORTE_ERROR_LOG(ret);
 417         error = "setup node topologies array";
 418         goto error;
 419     }
 420     /* Setup the job data object for the daemons */
 421     /* create and store the job data object */
 422     jdata = OBJ_NEW(orte_job_t);
 423     jdata->jobid = ORTE_PROC_MY_NAME->jobid;
 424     opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
 425     /* mark that the daemons have reported as we are the
 426      * only ones in the system right now, and we definitely
 427      * are running!
 428      */
 429     jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
 430 
 431     /* every job requires at least one app */
 432     app = OBJ_NEW(orte_app_context_t);
 433     opal_pointer_array_set_item(jdata->apps, 0, app);
 434     jdata->num_apps++;
 435     /* create and store a node object where we are */
 436     node = OBJ_NEW(orte_node_t);
 437     node->name = strdup(orte_process_info.nodename);
 438     node->index = ORTE_PROC_MY_NAME->vpid;
 439     opal_pointer_array_set_item(orte_node_pool, 0, node);
 440 
 441     /* create and store a proc object for us */
 442     proc = OBJ_NEW(orte_proc_t);
 443     proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
 444     proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
 445     proc->pid = orte_process_info.pid;
 446     orte_oob_base_get_addr(&proc->rml_uri);
 447     orte_process_info.my_hnp_uri = strdup(proc->rml_uri);
 448     /* store it in the local PMIx repo for later retrieval */
 449     OBJ_CONSTRUCT(&val, opal_value_t);
 450     val.key = OPAL_PMIX_PROC_URI;
 451     val.type = OPAL_STRING;
 452     val.data.string = proc->rml_uri;
 453     if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &val))) {
 454         ORTE_ERROR_LOG(ret);
 455         val.key = NULL;
 456         val.data.string = NULL;
 457         OBJ_DESTRUCT(&val);
 458         error = "store uri";
 459         goto error;
 460     }
 461     val.key = NULL;
 462     val.data.string = NULL;
 463     OBJ_DESTRUCT(&val);
 464     /* we are also officially a daemon, so better update that field too */
 465     orte_process_info.my_daemon_uri = strdup(proc->rml_uri);
 466     proc->state = ORTE_PROC_STATE_RUNNING;
 467     OBJ_RETAIN(node);  /* keep accounting straight */
 468     proc->node = node;
 469     opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc);
 470     /* record that the daemon (i.e., us) is on this node
 471      * NOTE: we do not add the proc object to the node's
 472      * proc array because we are not an application proc.
 473      * Instead, we record it in the daemon field of the
 474      * node object
 475      */
 476     OBJ_RETAIN(proc);   /* keep accounting straight */
 477     node->daemon = proc;
 478     ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
 479     node->state = ORTE_NODE_STATE_UP;
 480     /* if we are to retain aliases, get ours */
 481     if (orte_retain_aliases) {
 482         aliases = NULL;
 483         opal_ifgetaliases(&aliases);
 484         if (0 < opal_argv_count(aliases)) {
 485             /* add our own local name to it */
 486             opal_argv_append_nosize(&aliases, orte_process_info.nodename);
 487             aptr = opal_argv_join(aliases, ',');
 488             orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
 489             free(aptr);
 490         }
 491         opal_argv_free(aliases);
 492     }
 493     /* record that the daemon job is running */
 494     jdata->num_procs = 1;
 495     jdata->state = ORTE_JOB_STATE_RUNNING;
 496     /* obviously, we have "reported" */
 497     jdata->num_reported = 1;
 498 
 499     /* Now provide a chance for the PLM
 500      * to perform any module-specific init functions. This
 501      * needs to occur AFTER the communications are setup
 502      * as it may involve starting a non-blocking recv
 503      */
 504     if (ORTE_SUCCESS != (ret = orte_plm.init())) {
 505         ORTE_ERROR_LOG(ret);
 506         error = "orte_plm_init";
 507         goto error;
 508     }
 509     /*
 510      * Setup the remaining resource
 511      * management and errmgr frameworks - application procs
 512      * and daemons do not open these frameworks as they only use
 513      * the hnp proxy support in the PLM framework.
 514      */
 515     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ras_base_framework, 0))) {
 516         ORTE_ERROR_LOG(ret);
 517         error = "orte_ras_base_open";
 518         goto error;
 519     }
 520     if (ORTE_SUCCESS != (ret = orte_ras_base_select())) {
 521         ORTE_ERROR_LOG(ret);
 522         error = "orte_ras_base_find_available";
 523         goto error;
 524     }
 525     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) {
 526         ORTE_ERROR_LOG(ret);
 527         error = "orte_rmaps_base_open";
 528         goto error;
 529     }
 530     if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) {
 531         ORTE_ERROR_LOG(ret);
 532         error = "orte_rmaps_base_find_available";
 533         goto error;
 534     }
 535 
 536     /* if a topology file was given, then the rmaps framework open
 537      * will have reset our topology. Ensure we always get the right
 538      * one by setting our node topology afterwards
 539      */
 540     /* add it to the array of known topologies */
 541     t = OBJ_NEW(orte_topology_t);
 542     t->topo = opal_hwloc_topology;
 543     /* generate the signature */
 544     orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology);
 545     t->sig = strdup(orte_topo_signature);
 546     opal_pointer_array_add(orte_node_topologies, t);
 547     node->topology = t;
 548     if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
 549         opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 550         opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO);
 551     }
 552 
 553 
 554     /* init the hash table, if necessary */
 555     if (NULL == orte_coprocessors) {
 556         orte_coprocessors = OBJ_NEW(opal_hash_table_t);
 557         opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
 558     }
 559     /* detect and add any coprocessors */
 560     coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology);
 561     if (NULL != coprocessors) {
 562         /* separate the serial numbers of the coprocessors
 563          * on this host
 564          */
 565         sns = opal_argv_split(coprocessors, ',');
 566         for (idx=0; NULL != sns[idx]; idx++) {
 567             /* compute the hash */
 568             OPAL_HASH_STR(sns[idx], h);
 569             /* mark that this coprocessor is hosted by this node */
 570             opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid));
 571         }
 572         opal_argv_free(sns);
 573         free(coprocessors);
 574         orte_coprocessors_detected = true;
 575     }
 576     /* see if I am on a coprocessor */
 577     coprocessors = opal_hwloc_base_check_on_coprocessor();
 578     if (NULL != coprocessors) {
 579         /* compute the hash */
 580         OPAL_HASH_STR(coprocessors, h);
 581         /* mark that I am on this coprocessor */
 582         opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&(ORTE_PROC_MY_NAME->vpid));
 583         orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING);
 584         free(coprocessors);
 585         orte_coprocessors_detected = true;
 586     }
 587 
 588     /* Open/select the odls */
 589     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) {
 590         ORTE_ERROR_LOG(ret);
 591         error = "orte_odls_base_open";
 592         goto error;
 593     }
 594     if (ORTE_SUCCESS != (ret = orte_odls_base_select())) {
 595         ORTE_ERROR_LOG(ret);
 596         error = "orte_odls_base_select";
 597         goto error;
 598     }
 599     /* Open/select the rtc */
 600     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) {
 601         ORTE_ERROR_LOG(ret);
 602         error = "orte_rtc_base_open";
 603         goto error;
 604     }
 605     if (ORTE_SUCCESS != (ret = orte_rtc_base_select())) {
 606         ORTE_ERROR_LOG(ret);
 607         error = "orte_rtc_base_select";
 608         goto error;
 609     }
 610 
 611     /* setup the orte_show_help system to recv remote output */
 612     orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP,
 613                             ORTE_RML_PERSISTENT, orte_show_help_recv, NULL);
 614 
 615     if (orte_create_session_dirs) {
 616         /* set the opal_output hnp file location to be in the
 617          * proc-specific session directory. */
 618         opal_output_set_output_file_info(orte_process_info.proc_session_dir,
 619                                          "output-", NULL, NULL);
 620         /* save my contact info in a file for others to find */
 621         if( NULL == orte_process_info.jobfam_session_dir ){
 622             /* has to be set here! */
 623             ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 624             goto error;
 625         }
 626         contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, "contact.txt", NULL);
 627         OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 628                              "%s writing contact file %s",
 629                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 630                              contact_path));
 631 
 632         if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) {
 633             OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 634                                  "%s writing contact file failed with error %s",
 635                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 636                                  ORTE_ERROR_NAME(ret)));
 637         } else {
 638             OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
 639                                  "%s wrote contact file",
 640                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 641         }
 642         free(contact_path);
 643     }
 644 
 645     /* setup I/O forwarding system - must come after we init routes */
 646     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
 647         ORTE_ERROR_LOG(ret);
 648         error = "orte_iof_base_open";
 649         goto error;
 650     }
 651     if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
 652         ORTE_ERROR_LOG(ret);
 653         error = "orte_iof_base_select";
 654         goto error;
 655     }
 656     /* setup the FileM */
 657     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) {
 658         ORTE_ERROR_LOG(ret);
 659         error = "orte_filem_base_open";
 660         goto error;
 661     }
 662     if (ORTE_SUCCESS != (ret = orte_filem_base_select())) {
 663         ORTE_ERROR_LOG(ret);
 664         error = "orte_filem_base_select";
 665         goto error;
 666     }
 667 
 668     /* setup to support debugging */
 669     orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,
 670                              orte_debugger_init_after_spawn,
 671                              ORTE_SYS_PRI);
 672     orte_state.add_job_state(ORTE_JOB_STATE_DEBUGGER_DETACH,
 673                              orte_debugger_detached,
 674                              ORTE_SYS_PRI);
 675 
 676     /* if a tool has launched us and is requesting event reports,
 677      * then set its contact info into the comm system
 678      */
 679     if (orte_report_events) {
 680         if (ORTE_SUCCESS != (ret = orte_util_comm_connect_tool(orte_report_events_uri))) {
 681             error = "could not connect to tool";
 682             goto error;
 683         }
 684     }
 685     /* We actually do *not* want an HNP to voluntarily yield() the
 686        processor more than necessary.  Orterun already blocks when
 687        it is doing nothing, so it doesn't use any more CPU cycles than
 688        it should; but when it *is* doing something, we do not want it
 689        to be unnecessarily delayed because it voluntarily yielded the
 690        processor in the middle of its work.
 691        For example: when a message arrives at orterun, we want the
 692        OS to wake us up in a timely fashion (which most OS's
 693        seem good about doing) and then we want orterun to process
 694        the message as fast as possible.  If orterun yields and lets
 695        aggressive MPI applications get the processor back, it may be a
 696        long time before the OS schedules orterun to run again
 697        (particularly if there is no IO event to wake it up).  Hence,
 698        routed OOB messages (for example) may be significantly delayed
 699        before being delivered to MPI processes, which can be
 700        problematic in some scenarios (e.g., COMM_SPAWN, BTL's that
 701        require OOB messages for wireup, etc.). */
 702     opal_progress_set_yield_when_idle(false);
 703     return ORTE_SUCCESS;
 704 
 705   error:
 706     if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
 707         orte_show_help("help-orte-runtime.txt",
 708                        "orte_init:startup:internal-failure",
 709                        true, error, ORTE_ERROR_NAME(ret), ret);
 710     }
 711     /* remove my contact info file, if we have session directories */
 712     if (NULL != orte_process_info.jobfam_session_dir) {
 713         contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
 714                                     "contact.txt", NULL);
 715         unlink(contact_path);
 716         free(contact_path);
 717     }
 718     /* remove our use of the session directory tree */
 719     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 720     /* ensure we scrub the session directory tree */
 721     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 722     return ORTE_ERR_SILENT;
 723 }
 724 
 725 static int rte_finalize(void)
 726 {
 727     char *contact_path;
 728     orte_job_t *jdata;
 729     uint32_t key;
 730     orte_ess_base_signal_t *sig;
 731     unsigned int i;
 732 
 733     if (signals_set) {
 734         /* Remove the epipe handler */
 735         opal_event_signal_del(&epipe_handler);
 736         /* remove the term handler */
 737         opal_event_del(&term_handler);
 738         /** Remove the USR signal handlers */
 739         i = 0;
 740         OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
 741             opal_event_signal_del(forward_signals_events + i);
 742             ++i;
 743         }
 744         free (forward_signals_events);
 745         forward_signals_events = NULL;
 746         signals_set = false;
 747     }
 748 
 749     /* shutdown the pmix server */
 750     pmix_server_finalize();
 751     (void) mca_base_framework_close(&opal_pmix_base_framework);
 752     (void) mca_base_framework_close(&orte_filem_base_framework);
 753     /* output any lingering stdout/err data */
 754     fflush(stdout);
 755     fflush(stderr);
 756 
 757     (void) mca_base_framework_close(&orte_iof_base_framework);
 758     (void) mca_base_framework_close(&orte_rtc_base_framework);
 759     (void) mca_base_framework_close(&orte_odls_base_framework);
 760     (void) mca_base_framework_close(&orte_rmaps_base_framework);
 761     (void) mca_base_framework_close(&orte_ras_base_framework);
 762     (void) mca_base_framework_close(&orte_grpcomm_base_framework);
 763     (void) mca_base_framework_close(&orte_routed_base_framework);
 764     (void) mca_base_framework_close(&orte_plm_base_framework);
 765     /* first stage shutdown of the errmgr, deregister the handler but keep
 766      * the required facilities until the rml and oob are offline */
 767     orte_errmgr.finalize();
 768 
 769     /* cleanup the pstat stuff */
 770     (void) mca_base_framework_close(&opal_pstat_base_framework);
 771 
 772     /* remove my contact info file, if we have session directories */
 773     if (NULL != orte_process_info.jobfam_session_dir) {
 774         contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
 775                                     "contact.txt", NULL);
 776         unlink(contact_path);
 777         free(contact_path);
 778     }
 779 
 780     /* shutdown the messaging frameworks */
 781     (void) mca_base_framework_close(&orte_rml_base_framework);
 782     (void) mca_base_framework_close(&orte_oob_base_framework);
 783     (void) mca_base_framework_close(&orte_errmgr_base_framework);
 784     (void) mca_base_framework_close(&orte_state_base_framework);
 785 
 786     /* remove our use of the session directory tree */
 787     orte_session_dir_finalize(ORTE_PROC_MY_NAME);
 788     /* ensure we scrub the session directory tree */
 789     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 790 
 791     /* close the xml output file, if open */
 792     if (orte_xml_output) {
 793         fprintf(orte_xml_fp, "</mpirun>\n");
 794         fflush(orte_xml_fp);
 795         if (stdout != orte_xml_fp) {
 796             fclose(orte_xml_fp);
 797         }
 798     }
 799 
 800     /* release the job hash table */
 801     OPAL_HASH_TABLE_FOREACH(key, uint32, jdata, orte_job_data) {
 802         if (NULL != jdata) {
 803             OBJ_RELEASE(jdata);
 804         }
 805     }
 806     OBJ_RELEASE(orte_job_data);
 807 
 808     if (NULL != orte_process_info.super.proc_hostname) {
 809         free(orte_process_info.super.proc_hostname);
 810     }
 811     if (orte_do_not_launch) {
 812         exit(0);
 813     }
 814 
 815 {
 816     opal_pointer_array_t * array = orte_node_topologies;
 817     int i;
 818     if( array->number_free != array->size ) {
 819         OPAL_THREAD_LOCK(&array->lock);
 820         array->lowest_free = 0;
 821         array->number_free = array->size;
 822         for(i=0; i<array->size; i++) {
 823             if(NULL != array->addr[i]) {
 824                 orte_topology_t * topo = (orte_topology_t *)array->addr[i];
 825                 topo->topo = NULL;
 826                 OBJ_RELEASE(topo);
 827             }
 828             array->addr[i] = NULL;
 829         }
 830         OPAL_THREAD_UNLOCK(&array->lock);
 831     }
 832 }
 833     OBJ_RELEASE(orte_node_topologies);
 834 
 835 {
 836     opal_pointer_array_t * array = orte_node_pool;
 837     int i;
 838     orte_node_t* node = (orte_node_t *)opal_pointer_array_get_item(orte_node_pool, 0);
 839     assert(NULL != node);
 840     OBJ_RELEASE(node->daemon);
 841     node->daemon = NULL;
 842     if( array->number_free != array->size ) {
 843         OPAL_THREAD_LOCK(&array->lock);
 844         array->lowest_free = 0;
 845         array->number_free = array->size;
 846         for(i=0; i<array->size; i++) {
 847             if(NULL != array->addr[i]) {
 848                 node= (orte_node_t*)array->addr[i];
 849                 OBJ_RELEASE(node);
 850             }
 851             array->addr[i] = NULL;
 852         }
 853         OPAL_THREAD_UNLOCK(&array->lock);
 854     }
 855 }
 856     OBJ_RELEASE(orte_node_pool);
 857 
 858     free(orte_topo_signature);
 859 
 860     return ORTE_SUCCESS;
 861 }
 862 
 863 static void rte_abort(int status, bool report)
 864 {
 865     /* do NOT do a normal finalize as this will very likely
 866      * hang the process. We are aborting due to an abnormal condition
 867      * that precludes normal cleanup
 868      *
 869      * We do need to do the following bits to make sure we leave a
 870      * clean environment. Taken from orte_finalize():
 871      * - Assume errmgr cleans up child processes before we exit.
 872      */
 873 
 874     /* ensure we scrub the session directory tree */
 875     orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 876     /* - Clean out the global structures
 877      * (not really necessary, but good practice)
 878      */
 879     orte_proc_info_finalize();
 880     /* just exit */
 881     exit(status);
 882 }
 883 
 884 static void clean_abort(int fd, short flags, void *arg)
 885 {
 886     /* if we have already ordered this once, don't keep
 887      * doing it to avoid race conditions
 888      */
 889     if (opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
 890         if (forcibly_die) {
 891             /* kill any local procs */
 892             orte_odls.kill_local_procs(NULL);
 893             /* whack any lingering session directory files from our jobs */
 894             orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
 895             /* cleanup our pmix server */
 896             opal_pmix.finalize();
 897             /* exit with a non-zero status */
 898             exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
 899         }
 900         fprintf(stderr, "%s: abort is already in progress...hit ctrl-c again to forcibly terminate\n\n", orte_basename);
 901         forcibly_die = true;
 902         /* reset the event */
 903         opal_event_add(&term_handler, NULL);
 904         return;
 905     }
 906     /* ensure we exit with a non-zero status */
 907     ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
 908 
 909     /* ensure that the forwarding of stdin stops */
 910     orte_job_term_ordered = true;
 911     /* tell us to be quiet - hey, the user killed us with a ctrl-c,
 912      * so need to tell them that!
 913      */
 914     orte_execute_quiet = true;
 915     /* We are in an event handler; the job completed procedure
 916        will delete the signal handler that is currently running
 917        (which is a Bad Thing), so we can't call it directly.
 918        Instead, we have to exit this handler and setup to call
 919        job_completed() after this. */
 920     orte_plm.terminate_orteds();;
 921 }
 922 
 923 static struct timeval current, last={0,0};
 924 static bool first = true;
 925 
 926 /*
 927  * Attempt to terminate the job and wait for callback indicating
 928  * the job has been aborted.
 929  */
 930 static void abort_signal_callback(int fd)
 931 {
 932     uint8_t foo = 1;
 933     char *msg = "Abort is in progress...hit ctrl-c again within 5 seconds to forcibly terminate\n\n";
 934 
 935     /* if this is the first time thru, just get
 936      * the current time
 937      */
 938     if (first) {
 939         first = false;
 940         gettimeofday(&current, NULL);
 941     } else {
 942         /* get the current time */
 943         gettimeofday(&current, NULL);
 944         /* if this is within 5 seconds of the
 945          * last time we were called, then just
 946          * exit - we are probably stuck
 947          */
 948         if ((current.tv_sec - last.tv_sec) < 5) {
 949             exit(1);
 950         }
 951         write(1, (void*)msg, strlen(msg));
 952     }
 953     /* save the time */
 954     last.tv_sec = current.tv_sec;
 955     /* tell the event lib to attempt to abnormally terminate */
 956     write(term_pipe[1], &foo, 1);
 957 }
 958 
 959 /**
 960  * Deal with sigpipe errors
 961  */
 962 static int sigpipe_error_count=0;
 963 static void epipe_signal_callback(int fd, short flags, void *arg)
 964 {
 965     sigpipe_error_count++;
 966 
 967     if (10 < sigpipe_error_count) {
 968         /* time to abort */
 969         opal_output(0, "%s: SIGPIPE detected on fd %d - aborting", orte_basename, fd);
 970         clean_abort(0, 0, NULL);
 971     }
 972 
 973     return;
 974 }
 975 
 976 /**
 977  * Pass user signals to the remote application processes
 978  */
 979 static void  signal_forward_callback(int fd, short event, void *arg)
 980 {
 981     opal_event_t *signal = (opal_event_t*)arg;
 982     int signum, ret;
 983 
 984     signum = OPAL_EVENT_SIGNAL(signal);
 985     if (!orte_execute_quiet){
 986         fprintf(stderr, "%s: Forwarding signal %d to job\n",
 987                 orte_basename, signum);
 988     }
 989 
 990     /** send the signal out to the processes, including any descendants */
 991     if (ORTE_SUCCESS != (ret = orte_plm.signal_job(ORTE_JOBID_WILDCARD, signum))) {
 992         fprintf(stderr, "Signal %d could not be sent to the job (returned %d)",
 993                 signum, ret);
 994     }
 995 }

/* [<][>][^][v][top][bottom][index][help] */