root/orte/mca/plm/base/plm_base_launch_support.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_plm_base_set_slots
  2. orte_plm_base_daemons_reported
  3. orte_plm_base_allocation_complete
  4. orte_plm_base_daemons_launched
  5. files_ready
  6. orte_plm_base_vm_ready
  7. orte_plm_base_mapping_complete
  8. orte_plm_base_setup_job
  9. orte_plm_base_setup_job_complete
  10. orte_plm_base_complete_setup
  11. timer_cb
  12. orte_plm_base_launch_apps
  13. orte_plm_base_send_launch_msg
  14. orte_plm_base_post_launch
  15. orte_plm_base_registered
  16. orte_plm_base_daemon_topology
  17. orte_plm_base_daemon_callback
  18. orte_plm_base_daemon_failed
  19. orte_plm_base_setup_orted_cmd
  20. orte_plm_base_orted_append_basic_args
  21. orte_plm_base_setup_virtual_machine

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007-2017 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2009      Institut National de Recherche en Informatique
  14  *                         et Automatique. All rights reserved.
  15  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
  16  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2014-2018 Research Organization for Information Science
  18  *                         and Technology (RIST).  All rights reserved.
  19  * Copyright (c) 2016      IBM Corporation.  All rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  *
  26  */
  27 
  28 #include "orte_config.h"
  29 #include "orte/constants.h"
  30 
  31 #ifdef HAVE_SYS_WAIT_H
  32 #include <sys/wait.h>
  33 #endif
  34 #ifdef HAVE_SYS_TIME_H
  35 #include <sys/time.h>
  36 #endif  /* HAVE_SYS_TIME_H */
  37 #include <ctype.h>
  38 
  39 #include "opal/hash_string.h"
  40 #include "opal/util/argv.h"
  41 #include "opal/util/opal_environ.h"
  42 #include "opal/util/printf.h"
  43 #include "opal/class/opal_pointer_array.h"
  44 #include "opal/dss/dss.h"
  45 #include "opal/mca/hwloc/hwloc-internal.h"
  46 #include "opal/mca/pmix/pmix.h"
  47 #include "opal/mca/compress/compress.h"
  48 
  49 #include "orte/util/dash_host/dash_host.h"
  50 #include "orte/util/nidmap.h"
  51 #include "orte/util/session_dir.h"
  52 #include "orte/util/show_help.h"
  53 #include "orte/mca/errmgr/errmgr.h"
  54 #include "orte/mca/ess/ess.h"
  55 #include "orte/mca/iof/base/base.h"
  56 #include "orte/mca/odls/base/base.h"
  57 #include "orte/mca/ras/base/base.h"
  58 #include "orte/mca/rmaps/rmaps.h"
  59 #include "orte/mca/rmaps/base/base.h"
  60 #include "orte/mca/rml/rml.h"
  61 #include "orte/mca/rml/rml_types.h"
  62 #include "orte/mca/routed/routed.h"
  63 #include "orte/mca/grpcomm/base/base.h"
  64 #if OPAL_ENABLE_FT_CR == 1
  65 #include "orte/mca/snapc/base/base.h"
  66 #endif
  67 #include "orte/mca/filem/filem.h"
  68 #include "orte/mca/filem/base/base.h"
  69 #include "orte/mca/grpcomm/base/base.h"
  70 #include "orte/mca/rml/base/rml_contact.h"
  71 #include "orte/mca/rtc/rtc.h"
  72 #include "orte/runtime/orte_globals.h"
  73 #include "orte/runtime/runtime.h"
  74 #include "orte/runtime/orte_locks.h"
  75 #include "orte/runtime/orte_quit.h"
  76 #include "orte/util/name_fns.h"
  77 #include "orte/util/pre_condition_transports.h"
  78 #include "orte/util/proc_info.h"
  79 #include "orte/util/threads.h"
  80 #include "orte/mca/state/state.h"
  81 #include "orte/mca/state/base/base.h"
  82 #include "orte/util/hostfile/hostfile.h"
  83 #include "orte/mca/odls/odls_types.h"
  84 
  85 #include "orte/mca/plm/base/plm_private.h"
  86 #include "orte/mca/plm/base/base.h"
  87 
  88 void orte_plm_base_set_slots(orte_node_t *node)
  89 {
  90     if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) {
  91         if (NULL != node->topology && NULL != node->topology->topo) {
  92             node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
  93                                                              HWLOC_OBJ_CORE, 0,
  94                                                              OPAL_HWLOC_LOGICAL);
  95         }
  96     } else if (0 == strncmp(orte_set_slots, "sockets", strlen(orte_set_slots))) {
  97         if (NULL != node->topology && NULL != node->topology->topo) {
  98             if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
  99                                                                        HWLOC_OBJ_SOCKET, 0,
 100                                                                        OPAL_HWLOC_LOGICAL))) {
 101                 /* some systems don't report sockets - in this case,
 102                  * use numanodes */
 103                 node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
 104                                                                  HWLOC_OBJ_NODE, 0,
 105                                                                  OPAL_HWLOC_LOGICAL);
 106             }
 107         }
 108     } else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) {
 109         if (NULL != node->topology && NULL != node->topology->topo) {
 110             node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
 111                                                              HWLOC_OBJ_NODE, 0,
 112                                                              OPAL_HWLOC_LOGICAL);
 113         }
 114     } else if (0 == strncmp(orte_set_slots, "hwthreads", strlen(orte_set_slots))) {
 115         if (NULL != node->topology && NULL != node->topology->topo) {
 116             node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
 117                                                              HWLOC_OBJ_PU, 0,
 118                                                              OPAL_HWLOC_LOGICAL);
 119         }
 120     } else {
 121         /* must be a number */
 122         node->slots = strtol(orte_set_slots, NULL, 10);
 123     }
 124     /* mark the node as having its slots "given" */
 125     ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
 126 }
 127 
 128 void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
 129 {
 130     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 131     orte_topology_t *t;
 132     orte_node_t *node;
 133     int i, rc;
 134     uint8_t u8;
 135     opal_buffer_t buf;
 136     orte_grpcomm_signature_t *sig;
 137     orte_daemon_cmd_flag_t command = ORTE_DAEMON_PASS_NODE_INFO_CMD;
 138 
 139     ORTE_ACQUIRE_OBJECT(caddy);
 140 
 141     /* if we are not launching, then we just assume that all
 142      * daemons share our topology */
 143     if (orte_do_not_launch) {
 144         node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
 145         t = node->topology;
 146         for (i=1; i < orte_node_pool->size; i++) {
 147             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
 148                 continue;
 149             }
 150             if (NULL == node->topology) {
 151                 node->topology = t;
 152             }
 153         }
 154     }
 155 
 156     /* if this is an unmanaged allocation, then set the default
 157      * slots on each node as directed or using default
 158      */
 159     if (!orte_managed_allocation) {
 160         if (NULL != orte_set_slots &&
 161             0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) {
 162             caddy->jdata->total_slots_alloc = 0;
 163             for (i=0; i < orte_node_pool->size; i++) {
 164                 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
 165                     continue;
 166                 }
 167                 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
 168                     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 169                                          "%s plm:base:setting slots for node %s by %s",
 170                                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
 171                     orte_plm_base_set_slots(node);
 172                 }
 173                 caddy->jdata->total_slots_alloc += node->slots;
 174             }
 175         }
 176     }
 177 
 178     if (orte_display_allocation) {
 179         orte_ras_base_display_alloc();
 180     }
 181     /* ensure we update the routing plan */
 182     orte_routed.update_routing_plan();
 183 
 184     /* prep the buffer */
 185     OBJ_CONSTRUCT(&buf, opal_buffer_t);
 186     /* load the command */
 187     if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) {
 188         ORTE_ERROR_LOG(rc);
 189         OBJ_DESTRUCT(&buf);
 190         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 191         OBJ_RELEASE(caddy);
 192         return;
 193     }
 194 
 195 
 196     /* if we did not execute a tree-spawn, then the daemons do
 197      * not currently have a nidmap for the job - in that case,
 198      * send one to them */
 199     if (!orte_nidmap_communicated) {
 200         u8 = 1;
 201         if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) {
 202             ORTE_ERROR_LOG(rc);
 203             OBJ_DESTRUCT(&buf);
 204             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 205             OBJ_RELEASE(caddy);
 206             return;
 207         }
 208         if (OPAL_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &buf))) {
 209             ORTE_ERROR_LOG(rc);
 210             OBJ_DESTRUCT(&buf);
 211             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 212             OBJ_RELEASE(caddy);
 213             return;
 214         }
 215         orte_nidmap_communicated = true;
 216     } else {
 217         u8 = 0;
 218         if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) {
 219             ORTE_ERROR_LOG(rc);
 220             OBJ_DESTRUCT(&buf);
 221             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 222             OBJ_RELEASE(caddy);
 223             return;
 224         }
 225     }
 226 
 227     /* we always send the topologies and the #slots on each node. Note
 228      * that we cannot send the #slots until after the above step since,
 229      * for unmanaged allocations, we might have just determined it! */
 230     if (OPAL_SUCCESS != (rc = orte_util_pass_node_info(&buf))) {
 231         ORTE_ERROR_LOG(rc);
 232         OBJ_DESTRUCT(&buf);
 233         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 234         OBJ_RELEASE(caddy);
 235         return;
 236     }
 237 
 238     /* goes to all daemons */
 239     sig = OBJ_NEW(orte_grpcomm_signature_t);
 240     sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
 241     sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
 242     sig->signature[0].vpid = ORTE_VPID_WILDCARD;
 243     sig->sz = 1;
 244     if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &buf))) {
 245         ORTE_ERROR_LOG(rc);
 246         OBJ_RELEASE(sig);
 247         OBJ_DESTRUCT(&buf);
 248         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 249         OBJ_RELEASE(caddy);
 250         return;
 251     }
 252     OBJ_DESTRUCT(&buf);
 253     /* maintain accounting */
 254     OBJ_RELEASE(sig);
 255 
 256     /* progress the job */
 257     caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
 258     ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_VM_READY);
 259 
 260     /* cleanup */
 261     OBJ_RELEASE(caddy);
 262 }
 263 
 264 void orte_plm_base_allocation_complete(int fd, short args, void *cbdata)
 265 {
 266     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 267 
 268     ORTE_ACQUIRE_OBJECT(caddy);
 269 
 270     /* if we don't want to launch, then we at least want
 271      * to map so we can see where the procs would have
 272      * gone - so skip to the mapping state */
 273     if (orte_do_not_launch) {
 274         caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
 275         ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_MAP);
 276     } else {
 277         /* move the state machine along */
 278         caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
 279         ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
 280     }
 281 
 282     /* cleanup */
 283     OBJ_RELEASE(caddy);
 284 }
 285 
 286 void orte_plm_base_daemons_launched(int fd, short args, void *cbdata)
 287 {
 288     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 289 
 290     ORTE_ACQUIRE_OBJECT(caddy);
 291 
 292     /* do NOT increment the state - we wait for the
 293      * daemons to report that they have actually
 294      * started before moving to the right state
 295      */
 296     /* cleanup */
 297     OBJ_RELEASE(caddy);
 298 }
 299 
 300 static void files_ready(int status, void *cbdata)
 301 {
 302     orte_job_t *jdata = (orte_job_t*)cbdata;
 303 
 304     if (ORTE_SUCCESS != status) {
 305         ORTE_FORCED_TERMINATE(status);
 306     } else {
 307         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
 308     }
 309 }
 310 
 311 void orte_plm_base_vm_ready(int fd, short args, void *cbdata)
 312 {
 313     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 314 
 315     ORTE_ACQUIRE_OBJECT(caddy);
 316 
 317     /* progress the job */
 318     caddy->jdata->state = ORTE_JOB_STATE_VM_READY;
 319 
 320     /* position any required files */
 321     if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) {
 322         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 323     }
 324 
 325     /* cleanup */
 326     OBJ_RELEASE(caddy);
 327 }
 328 
 329 void orte_plm_base_mapping_complete(int fd, short args, void *cbdata)
 330 {
 331     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 332 
 333     ORTE_ACQUIRE_OBJECT(caddy);
 334 
 335     /* move the state machine along */
 336     caddy->jdata->state = ORTE_JOB_STATE_MAP_COMPLETE;
 337     ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_SYSTEM_PREP);
 338 
 339     /* cleanup */
 340     OBJ_RELEASE(caddy);
 341 }
 342 
 343 
 344 void orte_plm_base_setup_job(int fd, short args, void *cbdata)
 345 {
 346     int rc;
 347     int i;
 348     orte_app_context_t *app;
 349     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 350     char *key;
 351     orte_job_t *parent;
 352     orte_process_name_t name, *nptr;
 353 
 354     ORTE_ACQUIRE_OBJECT(caddy);
 355 
 356     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 357                          "%s plm:base:setup_job",
 358                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 359 
 360     if (ORTE_JOB_STATE_INIT != caddy->job_state) {
 361         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 362         OBJ_RELEASE(caddy);
 363         return;
 364     }
 365     /* update job state */
 366     caddy->jdata->state = caddy->job_state;
 367 
 368     /* start by getting a jobid */
 369     if (ORTE_JOBID_INVALID == caddy->jdata->jobid) {
 370         if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(caddy->jdata))) {
 371             ORTE_ERROR_LOG(rc);
 372             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 373             OBJ_RELEASE(caddy);
 374             return;
 375         }
 376 
 377         /* store it on the global job data pool - this is the key
 378          * step required before we launch the daemons. It allows
 379          * the orte_rmaps_base_setup_virtual_machine routine to
 380          * search all apps for any hosts to be used by the vm
 381          */
 382         opal_hash_table_set_value_uint32(orte_job_data, caddy->jdata->jobid, caddy->jdata);
 383     }
 384 
 385     /* if job recovery is not enabled, set it to default */
 386     if (!ORTE_FLAG_TEST(caddy->jdata, ORTE_JOB_FLAG_RECOVERABLE) &&
 387         orte_enable_recovery) {
 388         ORTE_FLAG_SET(caddy->jdata, ORTE_JOB_FLAG_RECOVERABLE);
 389     }
 390 
 391     /* setup transport keys in case the MPI layer needs them. If
 392      * this is a dynamic spawn, then use the same keys as the
 393      * parent process had so the new/old procs can communicate.
 394      * Otherwise we can use the jobfam and stepid as unique keys
 395      * because they are unique values assigned by the RM
 396      */
 397      nptr = &name;
 398      if (orte_get_attribute(&caddy->jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&nptr, OPAL_NAME)) {
 399         /* get the parent jdata */
 400         if (NULL == (parent = orte_get_job_data_object(name.jobid))) {
 401             ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 402             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 403             OBJ_RELEASE(caddy);
 404             return;
 405         }
 406         /* a tool might be the parent calling spawn, so cannot require that
 407          * a job transport key has been assigned to it */
 408         key = NULL;
 409         if (orte_get_attribute(&parent->attributes, ORTE_JOB_TRANSPORT_KEY, (void**)&key, OPAL_STRING) &&
 410             NULL != key) {
 411             /* record it */
 412             orte_set_attribute(&caddy->jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, key, OPAL_STRING);
 413             /* add the transport key envar to each app */
 414             for (i=0; i < caddy->jdata->apps->size; i++) {
 415                 if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(caddy->jdata->apps, i))) {
 416                     continue;
 417                 }
 418                 opal_setenv(OPAL_MCA_PREFIX"orte_precondition_transports", key, true, &app->env);
 419             }
 420             free(key);
 421         } else {
 422             if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) {
 423                 ORTE_ERROR_LOG(rc);
 424                 ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 425                 OBJ_RELEASE(caddy);
 426                 return;
 427             }
 428         }
 429     } else {
 430         /* this will also record the transport key attribute in the job object, and
 431          * adds the key envar to each app */
 432         if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) {
 433             ORTE_ERROR_LOG(rc);
 434             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 435             OBJ_RELEASE(caddy);
 436             return;
 437         }
 438     }
 439 
 440     /* if app recovery is not defined, set apps to defaults */
 441     for (i=0; i < caddy->jdata->apps->size; i++) {
 442         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(caddy->jdata->apps, i))) {
 443             continue;
 444         }
 445         if (!orte_get_attribute(&app->attributes, ORTE_APP_RECOV_DEF, NULL, OPAL_BOOL)) {
 446             orte_set_attribute(&app->attributes, ORTE_APP_MAX_RESTARTS, ORTE_ATTR_LOCAL, &orte_max_restarts, OPAL_INT32);
 447         }
 448     }
 449 
 450     /* set the job state to the next position */
 451     ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_INIT_COMPLETE);
 452 
 453     /* cleanup */
 454     OBJ_RELEASE(caddy);
 455 }
 456 
 457 void orte_plm_base_setup_job_complete(int fd, short args, void *cbdata)
 458 {
 459     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 460 
 461     ORTE_ACQUIRE_OBJECT(caddy);
 462 
 463     /* nothing to do here but move along */
 464     ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE);
 465     OBJ_RELEASE(caddy);
 466 }
 467 
 468 void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
 469 {
 470     orte_job_t *jdata, *jdatorted;
 471     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 472     orte_node_t *node;
 473     uint32_t h;
 474     orte_vpid_t *vptr;
 475     int i, rc;
 476     char *serial_number;
 477     orte_process_name_t requestor, *rptr;
 478 
 479     ORTE_ACQUIRE_OBJECT(caddy);
 480 
 481     opal_output_verbose(5, orte_plm_base_framework.framework_output,
 482                         "%s complete_setup on job %s",
 483                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 484                         ORTE_JOBID_PRINT(caddy->jdata->jobid));
 485 
 486     /* bozo check */
 487     if (ORTE_JOB_STATE_SYSTEM_PREP != caddy->job_state) {
 488         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 489         OBJ_RELEASE(caddy);
 490         return;
 491     }
 492     /* update job state */
 493     caddy->jdata->state = caddy->job_state;
 494 
 495     /* get the orted job data object */
 496     if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
 497         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 498         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 499         OBJ_RELEASE(caddy);
 500         return;
 501     }
 502 
 503     /* convenience */
 504     jdata = caddy->jdata;
 505 
 506     /* If this job is being started by me, then there is nothing
 507      * further we need to do as any user directives (e.g., to tie
 508      * off IO to /dev/null) will have been included in the launch
 509      * message and the IOF knows how to handle any default situation.
 510      * However, if this is a proxy spawn request, then the spawner
 511      * might be a tool that wants IO forwarded to it. If that's the
 512      * situation, then the job object will contain an attribute
 513      * indicating that request */
 514     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FWDIO_TO_TOOL, NULL, OPAL_BOOL)) {
 515         /* send a message to our IOF containing the requested pull */
 516         rptr = &requestor;
 517         if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&rptr, OPAL_NAME)) {
 518             ORTE_IOF_PROXY_PULL(jdata, rptr);
 519         } else {
 520             ORTE_IOF_PROXY_PULL(jdata, &jdata->originator);
 521         }
 522         /* the tool will PUSH its stdin, so nothing we need to do here
 523          * about stdin */
 524     }
 525 
 526     /* if coprocessors were detected, now is the time to
 527      * identify who is attached to what host - this info
 528      * will be shipped to the daemons in the nidmap. Someday,
 529      * there may be a direct way for daemons on coprocessors
 530      * to detect their hosts - but not today.
 531      */
 532     if (orte_coprocessors_detected) {
 533         /* cycle thru the nodes looking for coprocessors */
 534         for (i=0; i < orte_node_pool->size; i++) {
 535             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
 536                 continue;
 537             }
 538             /* if we don't have a serial number, then we are not a coprocessor */
 539             serial_number = NULL;
 540             if (!orte_get_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, (void**)&serial_number, OPAL_STRING)) {
 541                 continue;
 542             }
 543             if (NULL != serial_number) {
 544                 /* if we have a serial number, then we are a coprocessor - so
 545                  * compute our hash and lookup our hostid
 546                  */
 547                 OPAL_HASH_STR(serial_number, h);
 548                 free(serial_number);
 549                 if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h,
 550                                                                            (void**)&vptr))) {
 551                     ORTE_ERROR_LOG(rc);
 552                     break;
 553                 }
 554                 orte_set_attribute(&node->attributes, ORTE_NODE_HOSTID, ORTE_ATTR_LOCAL, vptr, ORTE_VPID);
 555             }
 556         }
 557     }
 558     /* done with the coprocessor mapping at this time */
 559     if (NULL != orte_coprocessors) {
 560         OBJ_RELEASE(orte_coprocessors);
 561     }
 562 
 563     /* set the job state to the next position */
 564     ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_APPS);
 565 
 566     /* cleanup */
 567     OBJ_RELEASE(caddy);
 568 }
 569 
 570 /* catch timeout to allow cmds to progress */
 571 static void timer_cb(int fd, short event, void *cbdata)
 572 {
 573     orte_job_t *jdata = (orte_job_t*)cbdata;
 574     orte_timer_t *timer=NULL;
 575 
 576     ORTE_ACQUIRE_OBJECT(jdata);
 577 
 578     /* declare launch failed */
 579     ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START);
 580 
 581     /* free event */
 582     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT, (void**)&timer, OPAL_PTR)) {
 583         /* timer is an orte_timer_t object */
 584         OBJ_RELEASE(timer);
 585         orte_remove_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT);
 586     }
 587 }
 588 
 589 void orte_plm_base_launch_apps(int fd, short args, void *cbdata)
 590 {
 591     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 592     orte_job_t *jdata;
 593     orte_daemon_cmd_flag_t command;
 594     int rc;
 595 
 596     ORTE_ACQUIRE_OBJECT(caddy);
 597 
 598     /* convenience */
 599     jdata = caddy->jdata;
 600 
 601     if (ORTE_JOB_STATE_LAUNCH_APPS != caddy->job_state) {
 602         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 603         OBJ_RELEASE(caddy);
 604         return;
 605     }
 606     /* update job state */
 607     caddy->jdata->state = caddy->job_state;
 608 
 609     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 610                          "%s plm:base:launch_apps for job %s",
 611                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 612                          ORTE_JOBID_PRINT(jdata->jobid)));
 613 
 614     /* pack the appropriate add_local_procs command */
 615     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) {
 616         command = ORTE_DAEMON_DVM_ADD_PROCS;
 617     } else {
 618         command = ORTE_DAEMON_ADD_LOCAL_PROCS;
 619     }
 620     if (ORTE_SUCCESS != (rc = opal_dss.pack(&jdata->launch_msg, &command, 1, ORTE_DAEMON_CMD))) {
 621         ORTE_ERROR_LOG(rc);
 622         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 623         OBJ_RELEASE(caddy);
 624         return;
 625     }
 626 
 627     /* get the local launcher's required data */
 628     if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(&jdata->launch_msg, jdata->jobid))) {
 629         ORTE_ERROR_LOG(rc);
 630         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 631     }
 632 
 633     OBJ_RELEASE(caddy);
 634     return;
 635 }
 636 
 637 void orte_plm_base_send_launch_msg(int fd, short args, void *cbdata)
 638 {
 639     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 640     orte_timer_t *timer;
 641     orte_grpcomm_signature_t *sig;
 642     orte_job_t *jdata;
 643     int rc;
 644 
 645     /* convenience */
 646     jdata = caddy->jdata;
 647 
 648     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 649                          "%s plm:base:send launch msg for job %s",
 650                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 651                          ORTE_JOBID_PRINT(jdata->jobid)));
 652 
 653     /* if we don't want to launch the apps, now is the time to leave */
 654     if (orte_do_not_launch) {
 655         bool compressed;
 656         uint8_t *cmpdata;
 657         size_t cmplen;
 658         /* report the size of the launch message */
 659         compressed = opal_compress.compress_block((uint8_t*)jdata->launch_msg.base_ptr,
 660                                               jdata->launch_msg.bytes_used,
 661                                               &cmpdata, &cmplen);
 662         if (compressed) {
 663             opal_output(0, "LAUNCH MSG RAW SIZE: %d COMPRESSED SIZE: %d",
 664                         (int)jdata->launch_msg.bytes_used, (int)cmplen);
 665             free(cmpdata);
 666         } else {
 667             opal_output(0, "LAUNCH MSG RAW SIZE: %d", (int)jdata->launch_msg.bytes_used);
 668         }
 669         orte_never_launched = true;
 670         ORTE_FORCED_TERMINATE(0);
 671         OBJ_RELEASE(caddy);
 672         return;
 673     }
 674 
 675     /* goes to all daemons */
 676     sig = OBJ_NEW(orte_grpcomm_signature_t);
 677     sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
 678     sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
 679     sig->signature[0].vpid = ORTE_VPID_WILDCARD;
 680     sig->sz = 1;
 681     if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &jdata->launch_msg))) {
 682         ORTE_ERROR_LOG(rc);
 683         OBJ_RELEASE(sig);
 684         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 685         OBJ_RELEASE(caddy);
 686         return;
 687     }
 688     OBJ_DESTRUCT(&jdata->launch_msg);
 689     OBJ_CONSTRUCT(&jdata->launch_msg, opal_buffer_t);
 690     /* maintain accounting */
 691     OBJ_RELEASE(sig);
 692 
 693     /* track that we automatically are considered to have reported - used
 694      * only to report launch progress
 695      */
 696     caddy->jdata->num_daemons_reported++;
 697 
 698     /* if requested, setup a timer - if we don't launch within the
 699      * defined time, then we know things have failed
 700      */
 701     if (0 < orte_startup_timeout) {
 702         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 703                              "%s plm:base:launch defining timeout for job %s",
 704                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 705                              ORTE_JOBID_PRINT(jdata->jobid)));
 706         timer = OBJ_NEW(orte_timer_t);
 707         timer->payload = jdata;
 708         opal_event_evtimer_set(orte_event_base,
 709                                timer->ev, timer_cb, jdata);
 710         opal_event_set_priority(timer->ev, ORTE_ERROR_PRI);
 711         timer->tv.tv_sec = orte_startup_timeout;
 712         timer->tv.tv_usec = 0;
 713         orte_set_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT, ORTE_ATTR_LOCAL, timer, OPAL_PTR);
 714         ORTE_POST_OBJECT(timer);
 715         opal_event_evtimer_add(timer->ev, &timer->tv);
 716     }
 717 
 718     /* cleanup */
 719     OBJ_RELEASE(caddy);
 720 }
 721 
 722 void orte_plm_base_post_launch(int fd, short args, void *cbdata)
 723 {
 724     int32_t rc;
 725     orte_job_t *jdata;
 726     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 727     orte_process_name_t name;
 728     orte_timer_t *timer=NULL;
 729     int ret;
 730     opal_buffer_t *answer;
 731     int room, *rmptr;
 732 
 733     ORTE_ACQUIRE_OBJECT(caddy);
 734 
 735     /* convenience */
 736     jdata = caddy->jdata;
 737 
 738     /* if a timer was defined, cancel it */
 739     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT, (void**)&timer, OPAL_PTR)) {
 740         opal_event_evtimer_del(timer->ev);
 741         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 742                              "%s plm:base:launch deleting timeout for job %s",
 743                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 744                              ORTE_JOBID_PRINT(jdata->jobid)));
 745         OBJ_RELEASE(timer);
 746         orte_remove_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT);
 747     }
 748 
 749     if (ORTE_JOB_STATE_RUNNING != caddy->job_state) {
 750         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 751         OBJ_RELEASE(caddy);
 752         return;
 753     }
 754     /* update job state */
 755     caddy->jdata->state = caddy->job_state;
 756 
 757     /* complete wiring up the iof */
 758     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 759                          "%s plm:base:launch wiring up iof for job %s",
 760                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 761                          ORTE_JOBID_PRINT(jdata->jobid)));
 762 
 763     /* push stdin - the IOF will know what to do with the specified target */
 764     name.jobid = jdata->jobid;
 765     name.vpid = jdata->stdin_target;
 766 
 767     if (ORTE_SUCCESS != (rc = orte_iof.push(&name, ORTE_IOF_STDIN, 0))) {
 768         ORTE_ERROR_LOG(rc);
 769         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 770         OBJ_RELEASE(caddy);
 771         return;
 772     }
 773 
 774     /* if this isn't a dynamic spawn, just cleanup */
 775     if (ORTE_JOBID_INVALID == jdata->originator.jobid) {
 776         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 777                              "%s plm:base:launch job %s is not a dynamic spawn",
 778                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 779                              ORTE_JOBID_PRINT(jdata->jobid)));
 780         goto cleanup;
 781     }
 782 
 783     /* prep the response */
 784     rc = ORTE_SUCCESS;
 785     answer = OBJ_NEW(opal_buffer_t);
 786     /* pack the status */
 787     if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT32))) {
 788         ORTE_ERROR_LOG(ret);
 789         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 790         OBJ_RELEASE(caddy);
 791         return;
 792     }
 793     /* pack the jobid */
 794     if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &jdata->jobid, 1, ORTE_JOBID))) {
 795         ORTE_ERROR_LOG(ret);
 796         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 797         OBJ_RELEASE(caddy);
 798         return;
 799     }
 800     /* pack the room number */
 801     rmptr = &room;
 802     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, (void**)&rmptr, OPAL_INT)) {
 803         if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &room, 1, OPAL_INT))) {
 804             ORTE_ERROR_LOG(ret);
 805             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 806             OBJ_RELEASE(caddy);
 807             return;
 808         }
 809     }
 810     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 811                          "%s plm:base:launch sending dyn release of job %s to %s",
 812                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 813                          ORTE_JOBID_PRINT(jdata->jobid),
 814                          ORTE_NAME_PRINT(&jdata->originator)));
 815     if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer,
 816                                            ORTE_RML_TAG_LAUNCH_RESP,
 817                                            orte_rml_send_callback, NULL))) {
 818         ORTE_ERROR_LOG(ret);
 819         OBJ_RELEASE(answer);
 820         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 821         OBJ_RELEASE(caddy);
 822         return;
 823     }
 824 
 825   cleanup:
 826     /* cleanup */
 827     OBJ_RELEASE(caddy);
 828 }
 829 
 830 void orte_plm_base_registered(int fd, short args, void *cbdata)
 831 {
 832     orte_job_t *jdata;
 833     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 834 
 835     ORTE_ACQUIRE_OBJECT(caddy);
 836 
 837     /* convenience */
 838     jdata = caddy->jdata;
 839 
 840     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 841                          "%s plm:base:launch %s registered",
 842                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 843                          ORTE_JOBID_PRINT(jdata->jobid)));
 844 
 845     if (ORTE_JOB_STATE_REGISTERED != caddy->job_state) {
 846         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 847                              "%s plm:base:launch job %s not registered - state %s",
 848                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 849                              ORTE_JOBID_PRINT(jdata->jobid),
 850                              orte_job_state_to_str(caddy->job_state)));
 851         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 852         OBJ_RELEASE(caddy);
 853         return;
 854     }
 855     /* update job state */
 856     jdata->state = caddy->job_state;
 857 
 858    /* if this wasn't a debugger job, then need to init_after_spawn for debuggers */
 859     if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
 860         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS);
 861     }
 862 
 863     OBJ_RELEASE(caddy);
 864 }
 865 
 866 /* daemons callback when they start - need to listen for them */
 867 static bool orted_failed_launch;
 868 static orte_job_t *jdatorted=NULL;
 869 
 870 /* callback for topology reports */
 871 void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender,
 872                                    opal_buffer_t *buffer,
 873                                    orte_rml_tag_t tag, void *cbdata)
 874 {
 875     hwloc_topology_t topo;
 876     int rc, idx;
 877     char *sig, *coprocessors, **sns;
 878     orte_proc_t *daemon=NULL;
 879     orte_topology_t *t, *t2;
 880     int i;
 881     uint32_t h;
 882     orte_job_t *jdata;
 883     uint8_t flag;
 884     size_t inlen, cmplen;
 885     uint8_t *packed_data, *cmpdata;
 886     opal_buffer_t datbuf, *data;
 887 
 888     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
 889                          "%s plm:base:daemon_topology recvd for daemon %s",
 890                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 891                          ORTE_NAME_PRINT(sender)));
 892 
 893     /* get the daemon job, if necessary */
 894     if (NULL == jdatorted) {
 895         jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
 896     }
 897     if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, sender->vpid))) {
 898         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 899         orted_failed_launch = true;
 900         goto CLEANUP;
 901     }
 902     OBJ_CONSTRUCT(&datbuf, opal_buffer_t);
 903     /* unpack the flag to see if this payload is compressed */
 904     idx=1;
 905     if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &idx, OPAL_INT8))) {
 906         ORTE_ERROR_LOG(rc);
 907         orted_failed_launch = true;
 908         goto CLEANUP;
 909     }
 910     if (flag) {
 911         /* unpack the data size */
 912         idx=1;
 913         if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &inlen, &idx, OPAL_SIZE))) {
 914             ORTE_ERROR_LOG(rc);
 915             orted_failed_launch = true;
 916             goto CLEANUP;
 917         }
 918         /* unpack the unpacked data size */
 919         idx=1;
 920         if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &cmplen, &idx, OPAL_SIZE))) {
 921             ORTE_ERROR_LOG(rc);
 922             orted_failed_launch = true;
 923             goto CLEANUP;
 924         }
 925         /* allocate the space */
 926         packed_data = (uint8_t*)malloc(inlen);
 927         /* unpack the data blob */
 928         idx = inlen;
 929         if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, packed_data, &idx, OPAL_UINT8))) {
 930             ORTE_ERROR_LOG(rc);
 931             orted_failed_launch = true;
 932             goto CLEANUP;
 933         }
 934         /* decompress the data */
 935         if (opal_compress.decompress_block(&cmpdata, cmplen,
 936                                        packed_data, inlen)) {
 937             /* the data has been uncompressed */
 938             opal_dss.load(&datbuf, cmpdata, cmplen);
 939             data = &datbuf;
 940         } else {
 941             data = buffer;
 942         }
 943         free(packed_data);
 944     } else {
 945         data = buffer;
 946     }
 947 
 948     /* unpack the topology signature for this node */
 949     idx=1;
 950     if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &sig, &idx, OPAL_STRING))) {
 951         ORTE_ERROR_LOG(rc);
 952         orted_failed_launch = true;
 953         goto CLEANUP;
 954     }
 955     /* find it in the array */
 956     t = NULL;
 957     for (i=0; i < orte_node_topologies->size; i++) {
 958         if (NULL == (t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) {
 959             continue;
 960         }
 961         /* just check the signature */
 962         if (0 == strcmp(sig, t2->sig)) {
 963             t = t2;
 964             break;
 965         }
 966     }
 967     if (NULL == t) {
 968         /* should never happen */
 969         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 970         orted_failed_launch = true;
 971         goto CLEANUP;
 972     }
 973 
 974     /* unpack the topology */
 975     idx=1;
 976     if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &topo, &idx, OPAL_HWLOC_TOPO))) {
 977         ORTE_ERROR_LOG(rc);
 978         orted_failed_launch = true;
 979         goto CLEANUP;
 980     }
 981     /* record the final topology */
 982     t->topo = topo;
 983 
 984     /* unpack any coprocessors */
 985     idx=1;
 986     if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &coprocessors, &idx, OPAL_STRING))) {
 987         ORTE_ERROR_LOG(rc);
 988         orted_failed_launch = true;
 989         goto CLEANUP;
 990     }
 991     if (NULL != coprocessors) {
 992         /* init the hash table, if necessary */
 993         if (NULL == orte_coprocessors) {
 994             orte_coprocessors = OBJ_NEW(opal_hash_table_t);
 995             opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
 996         }
 997         /* separate the serial numbers of the coprocessors
 998          * on this host
 999          */
1000         sns = opal_argv_split(coprocessors, ',');
1001         for (idx=0; NULL != sns[idx]; idx++) {
1002             /* compute the hash */
1003             OPAL_HASH_STR(sns[idx], h);
1004             /* mark that this coprocessor is hosted by this node */
1005             opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&daemon->name.vpid);
1006         }
1007         opal_argv_free(sns);
1008         free(coprocessors);
1009         orte_coprocessors_detected = true;
1010     }
1011     /* see if this daemon is on a coprocessor */
1012     idx=1;
1013     if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &coprocessors, &idx, OPAL_STRING))) {
1014         ORTE_ERROR_LOG(rc);
1015         orted_failed_launch = true;
1016         goto CLEANUP;
1017     }
1018     if (NULL != coprocessors) {
1019         if (orte_get_attribute(&daemon->node->attributes, ORTE_NODE_SERIAL_NUMBER, NULL, OPAL_STRING)) {
1020             /* this is not allowed - a coprocessor cannot be host
1021              * to another coprocessor at this time
1022              */
1023             ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
1024             orted_failed_launch = true;
1025             free(coprocessors);
1026             goto CLEANUP;
1027         }
1028         orte_set_attribute(&daemon->node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING);
1029         free(coprocessors);
1030         orte_coprocessors_detected = true;
1031     }
1032 
1033   CLEANUP:
1034     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1035                          "%s plm:base:orted:report_topo launch %s for daemon %s",
1036                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1037                          orted_failed_launch ? "failed" : "completed",
1038                          ORTE_NAME_PRINT(sender)));
1039 
1040     if (orted_failed_launch) {
1041         ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
1042         return;
1043     } else {
1044         jdatorted->num_reported++;
1045         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1046                              "%s plm:base:orted_report_launch recvd %d of %d reported daemons",
1047                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1048                              jdatorted->num_reported, jdatorted->num_procs));
1049         if (jdatorted->num_procs == jdatorted->num_reported) {
1050             bool dvm = true;
1051             uint32_t key;
1052             void *nptr;
1053             jdatorted->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
1054             /* activate the daemons_reported state for all jobs
1055              * whose daemons were launched
1056              */
1057             rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jdata, &nptr);
1058             while (OPAL_SUCCESS == rc) {
1059                 if (ORTE_PROC_MY_NAME->jobid != jdata->jobid) {
1060                     dvm = false;
1061                     if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) {
1062                         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
1063                     }
1064                 }
1065                 rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jdata, nptr, &nptr);
1066             }
1067             if (dvm) {
1068                 /* must be launching a DVM - activate the state */
1069                 ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_DAEMONS_REPORTED);
1070             }
1071         }
1072     }
1073 }
1074 
1075 void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
1076                                    opal_buffer_t *buffer,
1077                                    orte_rml_tag_t tag, void *cbdata)
1078 {
1079     char *ptr;
1080     int rc, idx;
1081     orte_proc_t *daemon=NULL;
1082     orte_job_t *jdata;
1083     orte_process_name_t dname;
1084     opal_buffer_t *relay;
1085     char *sig;
1086     orte_topology_t *t;
1087     hwloc_topology_t topo;
1088     int i;
1089     bool found;
1090     orte_daemon_cmd_flag_t cmd;
1091     int32_t flag;
1092     opal_value_t *kv;
1093     char *myendian;
1094 
1095     /* get the daemon job, if necessary */
1096     if (NULL == jdatorted) {
1097         jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
1098     }
1099 
1100     /* get my endianness */
1101     t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
1102     if (NULL == t) {
1103         /* should never happen */
1104         myendian = "unknown";
1105     } else {
1106         myendian = strrchr(t->sig, ':');
1107         ++myendian;
1108     }
1109 
1110     /* multiple daemons could be in this buffer, so unpack until we exhaust the data */
1111     idx = 1;
1112     while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &dname, &idx, ORTE_NAME))) {
1113         char *nodename = NULL;
1114 
1115         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1116                              "%s plm:base:orted_report_launch from daemon %s",
1117                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1118                              ORTE_NAME_PRINT(&dname)));
1119 
1120         /* update state and record for this daemon contact info */
1121         if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, dname.vpid))) {
1122             ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1123             orted_failed_launch = true;
1124             goto CLEANUP;
1125         }
1126         daemon->state = ORTE_PROC_STATE_RUNNING;
1127         /* record that this daemon is alive */
1128         ORTE_FLAG_SET(daemon, ORTE_PROC_FLAG_ALIVE);
1129 
1130         /* unpack the flag indicating the number of connection blobs
1131          * in the report */
1132         idx = 1;
1133         if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &idx, OPAL_INT32))) {
1134             ORTE_ERROR_LOG(rc);
1135             orted_failed_launch = true;
1136             goto CLEANUP;
1137         }
1138         for (i=0; i < flag; i++) {
1139             idx = 1;
1140             if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv, &idx, OPAL_VALUE))) {
1141                 ORTE_ERROR_LOG(rc);
1142                 orted_failed_launch = true;
1143                 goto CLEANUP;
1144             }
1145             /* store this in a daemon wireup buffer for later distribution */
1146             opal_pmix.store_local(&dname, kv);
1147             OBJ_RELEASE(kv);
1148         }
1149 
1150         /* unpack the node name */
1151         idx = 1;
1152         if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nodename, &idx, OPAL_STRING))) {
1153             ORTE_ERROR_LOG(rc);
1154             orted_failed_launch = true;
1155             goto CLEANUP;
1156         }
1157         if (!orte_have_fqdn_allocation) {
1158             /* remove any domain info */
1159             if (NULL != (ptr = strchr(nodename, '.'))) {
1160                 *ptr = '\0';
1161                 ptr = strdup(nodename);
1162                 free(nodename);
1163                 nodename = ptr;
1164             }
1165         }
1166 
1167         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1168                              "%s plm:base:orted_report_launch from daemon %s on node %s",
1169                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1170                              ORTE_NAME_PRINT(&daemon->name), nodename));
1171 
1172         /* mark the daemon as launched */
1173         ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
1174 
1175         if (orte_retain_aliases) {
1176             char *alias, **atmp=NULL;
1177             uint8_t naliases, ni;
1178             /* first, store the nodename itself as an alias. We do
1179              * this in case the nodename isn't the same as what we
1180              * were given by the allocation. For example, a hostfile
1181              * might contain an IP address instead of the value returned
1182              * by gethostname, yet the daemon will have returned the latter
1183              * and apps may refer to the host by that name
1184              */
1185             opal_argv_append_nosize(&atmp, nodename);
1186             /* unpack and store the provided aliases */
1187             idx = 1;
1188             if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &naliases, &idx, OPAL_UINT8))) {
1189                 ORTE_ERROR_LOG(rc);
1190                 orted_failed_launch = true;
1191                 goto CLEANUP;
1192             }
1193             for (ni=0; ni < naliases; ni++) {
1194                 idx = 1;
1195                 if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &alias, &idx, OPAL_STRING))) {
1196                     ORTE_ERROR_LOG(rc);
1197                     orted_failed_launch = true;
1198                     goto CLEANUP;
1199                 }
1200                 opal_argv_append_nosize(&atmp, alias);
1201                 free(alias);
1202             }
1203             if (0 < naliases) {
1204                 alias = opal_argv_join(atmp, ',');
1205                 orte_set_attribute(&daemon->node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, alias, OPAL_STRING);
1206                 free(alias);
1207             }
1208             opal_argv_free(atmp);
1209         }
1210 
1211         /* unpack the topology signature for that node */
1212         idx=1;
1213         if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) {
1214             ORTE_ERROR_LOG(rc);
1215             orted_failed_launch = true;
1216             goto CLEANUP;
1217         }
1218         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1219                              "%s RECEIVED TOPOLOGY SIG %s FROM NODE %s",
1220                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sig, nodename));
1221 
1222         /* rank=1 always sends its topology back */
1223         topo = NULL;
1224         if (1 == dname.vpid) {
1225             uint8_t flag;
1226             size_t inlen, cmplen;
1227             uint8_t *packed_data, *cmpdata;
1228             opal_buffer_t datbuf, *data;
1229             OBJ_CONSTRUCT(&datbuf, opal_buffer_t);
1230             /* unpack the flag to see if this payload is compressed */
1231             idx=1;
1232             if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &idx, OPAL_INT8))) {
1233                 ORTE_ERROR_LOG(rc);
1234                 orted_failed_launch = true;
1235                 goto CLEANUP;
1236             }
1237             if (flag) {
1238                 /* unpack the data size */
1239                 idx=1;
1240                 if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &inlen, &idx, OPAL_SIZE))) {
1241                     ORTE_ERROR_LOG(rc);
1242                     orted_failed_launch = true;
1243                     goto CLEANUP;
1244                 }
1245                 /* unpack the unpacked data size */
1246                 idx=1;
1247                 if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &cmplen, &idx, OPAL_SIZE))) {
1248                     ORTE_ERROR_LOG(rc);
1249                     orted_failed_launch = true;
1250                     goto CLEANUP;
1251                 }
1252                 /* allocate the space */
1253                 packed_data = (uint8_t*)malloc(inlen);
1254                 /* unpack the data blob */
1255                 idx = inlen;
1256                 if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, packed_data, &idx, OPAL_UINT8))) {
1257                     ORTE_ERROR_LOG(rc);
1258                     orted_failed_launch = true;
1259                     goto CLEANUP;
1260                 }
1261                 /* decompress the data */
1262                 if (opal_compress.decompress_block(&cmpdata, cmplen,
1263                                                packed_data, inlen)) {
1264                     /* the data has been uncompressed */
1265                     opal_dss.load(&datbuf, cmpdata, cmplen);
1266                     data = &datbuf;
1267                 } else {
1268                     data = buffer;
1269                 }
1270                 free(packed_data);
1271             } else {
1272                 data = buffer;
1273             }
1274             idx=1;
1275             if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &topo, &idx, OPAL_HWLOC_TOPO))) {
1276                 ORTE_ERROR_LOG(rc);
1277                 orted_failed_launch = true;
1278                 goto CLEANUP;
1279             }
1280         }
1281 
1282         /* do we already have this topology from some other node? */
1283         found = false;
1284         for (i=0; i < orte_node_topologies->size; i++) {
1285             if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) {
1286                 continue;
1287             }
1288             /* just check the signature */
1289             if (0 == strcmp(sig, t->sig)) {
1290                 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1291                                      "%s TOPOLOGY ALREADY RECORDED",
1292                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1293                 found = true;
1294                 daemon->node->topology = t;
1295                 if (NULL != topo) {
1296                     hwloc_topology_destroy(topo);
1297                 }
1298                 free(sig);
1299                 break;
1300             }
1301 #if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT
1302               else {
1303                 /* check if the difference is due to the endianness */
1304                 ptr = strrchr(sig, ':');
1305                 ++ptr;
1306                 if (0 != strcmp(ptr, myendian)) {
1307                     /* we don't currently handle multi-endian operations in the
1308                      * MPI support */
1309                     orte_show_help("help-plm-base", "multi-endian", true,
1310                                    nodename, ptr, myendian);
1311                     orted_failed_launch = true;
1312                     if (NULL != topo) {
1313                         hwloc_topology_destroy(topo);
1314                     }
1315                     goto CLEANUP;
1316                 }
1317             }
1318 #endif
1319         }
1320 
1321         if (!found) {
1322             /* nope - save the signature and request the complete topology from that node */
1323             OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1324                                  "%s NEW TOPOLOGY - ADDING",
1325                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1326             t = OBJ_NEW(orte_topology_t);
1327             t->sig = sig;
1328             t->index = opal_pointer_array_add(orte_node_topologies, t);
1329             daemon->node->topology = t;
1330             if (NULL != topo) {
1331                 t->topo = topo;
1332             } else {
1333                 /* nope - save the signature and request the complete topology from that node */
1334                 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1335                                      "%s REQUESTING TOPOLOGY FROM %s",
1336                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1337                                      ORTE_NAME_PRINT(&dname)));
1338                 /* construct the request */
1339                 relay = OBJ_NEW(opal_buffer_t);
1340                 cmd = ORTE_DAEMON_REPORT_TOPOLOGY_CMD;
1341                 if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &cmd, 1, ORTE_DAEMON_CMD))) {
1342                     ORTE_ERROR_LOG(rc);
1343                     OBJ_RELEASE(relay);
1344                     orted_failed_launch = true;
1345                     goto CLEANUP;
1346                 }
1347                 /* send it */
1348                 orte_rml.send_buffer_nb(&dname, relay,
1349                                         ORTE_RML_TAG_DAEMON,
1350                                         orte_rml_send_callback, NULL);
1351                 /* we will count this node as completed
1352                  * when we get the full topology back */
1353                 if (NULL != nodename) {
1354                     free(nodename);
1355                     nodename = NULL;
1356                 }
1357                 idx = 1;
1358                 continue;
1359             }
1360         }
1361 
1362       CLEANUP:
1363         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1364                              "%s plm:base:orted_report_launch %s for daemon %s at contact %s",
1365                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1366                              orted_failed_launch ? "failed" : "completed",
1367                              ORTE_NAME_PRINT(&dname),
1368                              (NULL == daemon) ? "UNKNOWN" : daemon->rml_uri));
1369 
1370         if (NULL != nodename) {
1371             free(nodename);
1372             nodename = NULL;
1373         }
1374 
1375         if (orted_failed_launch) {
1376             ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
1377             return;
1378         } else {
1379             jdatorted->num_reported++;
1380             OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1381                                  "%s plm:base:orted_report_launch job %s recvd %d of %d reported daemons",
1382                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1383                                  ORTE_JOBID_PRINT(jdatorted->jobid),
1384                                  jdatorted->num_reported, jdatorted->num_procs));
1385             if (jdatorted->num_procs == jdatorted->num_reported) {
1386                 bool dvm = true;
1387                 uint32_t key;
1388                 void *nptr;
1389                 jdatorted->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
1390                 /* activate the daemons_reported state for all jobs
1391                  * whose daemons were launched
1392                  */
1393                 rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jdata, &nptr);
1394                 while (OPAL_SUCCESS == rc) {
1395                     if (ORTE_PROC_MY_NAME->jobid == jdata->jobid) {
1396                         goto next;
1397                     }
1398                     dvm = false;
1399                     if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) {
1400                         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
1401                     }
1402                   next:
1403                     rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jdata, nptr, &nptr);
1404                 }
1405                 if (dvm) {
1406                     /* must be launching a DVM - activate the state */
1407                     ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_DAEMONS_REPORTED);
1408                 }
1409             }
1410         }
1411         idx = 1;
1412     }
1413     if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
1414         ORTE_ERROR_LOG(rc);
1415         ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
1416     }
1417 }
1418 
1419 void orte_plm_base_daemon_failed(int st, orte_process_name_t* sender,
1420                                  opal_buffer_t *buffer,
1421                                  orte_rml_tag_t tag, void *cbdata)
1422 {
1423     int status, rc;
1424     int32_t n;
1425     orte_vpid_t vpid;
1426     orte_proc_t *daemon=NULL;
1427 
1428     /* get the daemon job, if necessary */
1429     if (NULL == jdatorted) {
1430         jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
1431     }
1432 
1433     /* unpack the daemon that failed */
1434     n=1;
1435     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &vpid, &n, ORTE_VPID))) {
1436         ORTE_ERROR_LOG(rc);
1437         ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
1438         goto finish;
1439     }
1440 
1441     /* unpack the exit status */
1442     n=1;
1443     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &status, &n, OPAL_INT))) {
1444         ORTE_ERROR_LOG(rc);
1445         status = ORTE_ERROR_DEFAULT_EXIT_CODE;
1446         ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
1447     } else {
1448         ORTE_UPDATE_EXIT_STATUS(WEXITSTATUS(status));
1449     }
1450 
1451     /* find the daemon and update its state/status */
1452     if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, vpid))) {
1453         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1454         goto finish;
1455     }
1456     daemon->state = ORTE_PROC_STATE_FAILED_TO_START;
1457     daemon->exit_code = status;
1458 
1459   finish:
1460     if (NULL == daemon) {
1461         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
1462         return;
1463     }
1464     ORTE_ACTIVATE_PROC_STATE(&daemon->name, ORTE_PROC_STATE_FAILED_TO_START);
1465 }
1466 
1467 int orte_plm_base_setup_orted_cmd(int *argc, char ***argv)
1468 {
1469     int i, loc;
1470     char **tmpv;
1471 
1472     /* set default location to be 0, indicating that
1473      * only a single word is in the cmd
1474      */
1475     loc = 0;
1476     /* split the command apart in case it is multi-word */
1477     tmpv = opal_argv_split(orte_launch_agent, ' ');
1478     for (i = 0; NULL != tmpv && NULL != tmpv[i]; ++i) {
1479         if (0 == strcmp(tmpv[i], "orted")) {
1480             loc = i;
1481         }
1482         opal_argv_append(argc, argv, tmpv[i]);
1483     }
1484     opal_argv_free(tmpv);
1485 
1486     return loc;
1487 }
1488 
1489 
1490 /* pass all options as MCA params so anything we pickup
1491  * from the environment can be checked for duplicates
1492  */
1493 int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
1494                                           char *ess,
1495                                           int *proc_vpid_index)
1496 {
1497     char *param = NULL;
1498     const char **tmp_value, **tmp_value2;
1499     int loc_id;
1500     char *tmp_force = NULL;
1501     int i, j, cnt, rc;
1502     orte_job_t *jdata;
1503     unsigned long num_procs;
1504     bool ignore;
1505 
1506     /* check for debug flags */
1507     if (orte_debug_flag) {
1508         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1509         opal_argv_append(argc, argv, "orte_debug");
1510         opal_argv_append(argc, argv, "1");
1511     }
1512     if (orte_debug_daemons_flag) {
1513         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1514         opal_argv_append(argc, argv, "orte_debug_daemons");
1515         opal_argv_append(argc, argv, "1");
1516     }
1517     if (orte_debug_daemons_file_flag) {
1518         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1519         opal_argv_append(argc, argv, "orte_debug_daemons_file");
1520         opal_argv_append(argc, argv, "1");
1521     }
1522     if (orte_leave_session_attached) {
1523         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1524         opal_argv_append(argc, argv, "orte_leave_session_attached");
1525         opal_argv_append(argc, argv, "1");
1526     }
1527 
1528     if (orted_spin_flag) {
1529         opal_argv_append(argc, argv, "--spin");
1530     }
1531 
1532     if (opal_hwloc_report_bindings) {
1533         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1534         opal_argv_append(argc, argv, "orte_report_bindings");
1535         opal_argv_append(argc, argv, "1");
1536     }
1537 
1538     if (orte_map_stddiag_to_stderr) {
1539         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1540         opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr");
1541         opal_argv_append(argc, argv, "1");
1542     }
1543     else if (orte_map_stddiag_to_stdout) {
1544         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1545         opal_argv_append(argc, argv, "orte_map_stddiag_to_stdout");
1546         opal_argv_append(argc, argv, "1");
1547     }
1548 
1549     /* the following is not an mca param */
1550     if (NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) {
1551         opal_argv_append(argc, argv, "--test-suicide");
1552     }
1553 
1554     /* tell the orted what ESS component to use */
1555     if (NULL != ess) {
1556         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1557         opal_argv_append(argc, argv, "ess");
1558         opal_argv_append(argc, argv, ess);
1559     }
1560 
1561     /* pass the daemon jobid */
1562     opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1563     opal_argv_append(argc, argv, "ess_base_jobid");
1564     if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&param, ORTE_PROC_MY_NAME->jobid))) {
1565         ORTE_ERROR_LOG(rc);
1566         return rc;
1567     }
1568     opal_argv_append(argc, argv, param);
1569     free(param);
1570 
1571     /* setup to pass the vpid */
1572     if (NULL != proc_vpid_index) {
1573         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1574         opal_argv_append(argc, argv, "ess_base_vpid");
1575         *proc_vpid_index = *argc;
1576         opal_argv_append(argc, argv, "<template>");
1577     }
1578 
1579     /* pass the total number of daemons that will be in the system */
1580     if (ORTE_PROC_IS_HNP) {
1581         jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
1582         num_procs = jdata->num_procs;
1583     } else {
1584         num_procs = orte_process_info.num_procs;
1585     }
1586     opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1587     opal_argv_append(argc, argv, "ess_base_num_procs");
1588     opal_asprintf(&param, "%lu", num_procs);
1589     opal_argv_append(argc, argv, param);
1590     free(param);
1591 
1592     /* pass the HNP uri */
1593     opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1594     opal_argv_append(argc, argv, "orte_hnp_uri");
1595     opal_argv_append(argc, argv, orte_process_info.my_hnp_uri);
1596 
1597     /* if --xterm was specified, pass that along */
1598     if (NULL != orte_xterm) {
1599         opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1600         opal_argv_append(argc, argv, "orte_xterm");
1601         opal_argv_append(argc, argv, orte_xterm);
1602     }
1603 
1604     loc_id = mca_base_var_find("opal", "mca", "base", "param_files");
1605     if (loc_id < 0) {
1606         rc = OPAL_ERR_NOT_FOUND;
1607         ORTE_ERROR_LOG(rc);
1608         return rc;
1609     }
1610     tmp_value = NULL;
1611     rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
1612     if (ORTE_SUCCESS != rc) {
1613         ORTE_ERROR_LOG(rc);
1614         return rc;
1615     }
1616     if (NULL != tmp_value && NULL != tmp_value[0]) {
1617         rc = strcmp(tmp_value[0], "none");
1618     } else {
1619         rc = 1;
1620     }
1621 
1622     if (0 != rc) {
1623         /*
1624          * Pass along the Aggregate MCA Parameter Sets
1625          */
1626         /* Add the 'prefix' param */
1627         tmp_value = NULL;
1628 
1629         loc_id = mca_base_var_find("opal", "mca", "base", "envar_file_prefix");
1630         if (loc_id < 0) {
1631             rc = OPAL_ERR_NOT_FOUND;
1632             ORTE_ERROR_LOG(rc);
1633             return rc;
1634         }
1635         rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
1636         if (ORTE_SUCCESS != rc) {
1637             ORTE_ERROR_LOG(rc);
1638             return rc;
1639         }
1640         if( NULL != tmp_value && NULL != tmp_value[0] ) {
1641             /* Could also use the short version '-tune'
1642              * but being verbose has some value
1643              */
1644             opal_argv_append(argc, argv, "-mca");
1645             opal_argv_append(argc, argv, "mca_base_envar_file_prefix");
1646             opal_argv_append(argc, argv, tmp_value[0]);
1647         }
1648 
1649         tmp_value2 = NULL;
1650         loc_id = mca_base_var_find("opal", "mca", "base", "param_file_prefix");
1651         mca_base_var_get_value(loc_id, &tmp_value2, NULL, NULL);
1652         if( NULL != tmp_value2 && NULL != tmp_value2[0] ) {
1653             /* Could also use the short version '-am'
1654              * but being verbose has some value
1655              */
1656             opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1657             opal_argv_append(argc, argv, "mca_base_param_file_prefix");
1658             opal_argv_append(argc, argv, tmp_value2[0]);
1659             orte_show_help("help-plm-base.txt", "deprecated-amca", true);
1660         }
1661 
1662         if ((NULL != tmp_value && NULL != tmp_value[0])
1663             || (NULL != tmp_value2 && NULL != tmp_value2[0])) {
1664             /* Add the 'path' param */
1665             tmp_value = NULL;
1666             loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path");
1667             if (loc_id < 0) {
1668                 ORTE_ERROR_LOG(rc);
1669                 return rc;
1670             }
1671             rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
1672             if (ORTE_SUCCESS != rc) {
1673                 ORTE_ERROR_LOG(rc);
1674                 return rc;
1675             }
1676             if( NULL != tmp_value && NULL != tmp_value[0] ) {
1677                 opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1678                 opal_argv_append(argc, argv, "mca_base_param_file_path");
1679                 opal_argv_append(argc, argv, tmp_value[0]);
1680             }
1681 
1682             /* Add the 'path' param */
1683             opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1684             opal_argv_append(argc, argv, "mca_base_param_file_path_force");
1685 
1686             tmp_value = NULL;
1687             loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path_force");
1688             if (loc_id < 0) {
1689                 rc = OPAL_ERR_NOT_FOUND;
1690                 ORTE_ERROR_LOG(rc);
1691                 return rc;
1692             }
1693             rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
1694             if (OPAL_SUCCESS != rc) {
1695                 ORTE_ERROR_LOG(rc);
1696                 return rc;
1697             }
1698             if( NULL == tmp_value || NULL == tmp_value[0] ) {
1699                 /* Get the current working directory */
1700                 tmp_force = (char *) malloc(sizeof(char) * OPAL_PATH_MAX);
1701                 if (NULL == getcwd(tmp_force, OPAL_PATH_MAX)) {
1702                     free(tmp_force);
1703                     tmp_force = strdup("");
1704                 }
1705 
1706                 opal_argv_append(argc, argv, tmp_force);
1707                 free(tmp_force);
1708             } else {
1709                 opal_argv_append(argc, argv, tmp_value[0]);
1710             }
1711         }
1712     }
1713 
1714     /* pass along any cmd line MCA params provided to mpirun,
1715      * being sure to "purge" any that would cause problems
1716      * on backend nodes and ignoring all duplicates
1717      */
1718     if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
1719         cnt = opal_argv_count(orted_cmd_line);
1720         for (i=0; i < cnt; i+=3) {
1721             /* if the specified option is more than one word, we don't
1722              * have a generic way of passing it as some environments ignore
1723              * any quotes we add, while others don't - so we ignore any
1724              * such options. In most cases, this won't be a problem as
1725              * they typically only apply to things of interest to the HNP.
1726              * Individual environments can add these back into the cmd line
1727              * as they know if it can be supported
1728              */
1729             if (NULL != strchr(orted_cmd_line[i+2], ' ')) {
1730                 continue;
1731             }
1732             /* The daemon will attempt to open the PLM on the remote
1733              * end. Only a few environments allow this, so the daemon
1734              * only opens the PLM -if- it is specifically told to do
1735              * so by giving it a specific PLM module. To ensure we avoid
1736              * confusion, do not include any directives here
1737              */
1738             if (0 == strcmp(orted_cmd_line[i+1], "plm")) {
1739                 continue;
1740             }
1741             /* check for duplicate */
1742             ignore = false;
1743             for (j=0; j < *argc; j++) {
1744                 if (0 == strcmp((*argv)[j], orted_cmd_line[i+1])) {
1745                     ignore = true;
1746                     break;
1747                 }
1748             }
1749             if (!ignore) {
1750                 /* pass it along */
1751                 opal_argv_append(argc, argv, orted_cmd_line[i]);
1752                 opal_argv_append(argc, argv, orted_cmd_line[i+1]);
1753                 opal_argv_append(argc, argv, orted_cmd_line[i+2]);
1754             }
1755         }
1756     }
1757 
1758     return ORTE_SUCCESS;
1759 }
1760 
1761 int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
1762 {
1763     orte_node_t *node, *nptr;
1764     orte_proc_t *proc, *pptr;
1765     orte_job_map_t *map=NULL;
1766     int rc, i;
1767     orte_job_t *daemons;
1768     opal_list_t nodes, tnodes;
1769     opal_list_item_t *item, *next;
1770     orte_app_context_t *app;
1771     bool one_filter = false;
1772     int num_nodes;
1773     bool default_hostfile_used;
1774     char *hosts = NULL;
1775     bool singleton=false;
1776     bool multi_sim = false;
1777 
1778     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1779                          "%s plm:base:setup_vm",
1780                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1781 
1782     if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
1783         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1784         return ORTE_ERR_NOT_FOUND;
1785     }
1786     if (NULL == daemons->map) {
1787         daemons->map = OBJ_NEW(orte_job_map_t);
1788     }
1789     map = daemons->map;
1790 
1791     /* if this job is being launched against a fixed DVM, then there is
1792      * nothing for us to do - the DVM will stand as is */
1793     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, NULL, OPAL_BOOL)) {
1794         /* mark that the daemons have reported so we can proceed */
1795         daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
1796         map->num_new_daemons = 0;
1797         return ORTE_SUCCESS;
1798     }
1799 
1800     /* if this is a dynamic spawn, then we don't make any changes to
1801      * the virtual machine unless specifically requested to do so
1802      */
1803     if (ORTE_JOBID_INVALID != jdata->originator.jobid) {
1804         if (0 == map->num_nodes) {
1805             OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1806                                  "%s plm:base:setup_vm creating map",
1807                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1808             /* this is the first time thru, so the vm is just getting
1809              * defined - create a map for it and put us in as we
1810              * are obviously already here! The ess will already
1811              * have assigned our node to us.
1812              */
1813             node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
1814             opal_pointer_array_add(map->nodes, (void*)node);
1815             ++(map->num_nodes);
1816             /* maintain accounting */
1817             OBJ_RETAIN(node);
1818             /* mark that this is from a singleton */
1819             singleton = true;
1820         }
1821         OBJ_CONSTRUCT(&nodes, opal_list_t);
1822         for (i=1; i < orte_node_pool->size; i++) {
1823             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
1824                 continue;
1825             }
1826             /* only add in nodes marked as "added" */
1827             if (!singleton && ORTE_NODE_STATE_ADDED != node->state) {
1828                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
1829                                      "%s plm_base:setup_vm NODE %s WAS NOT ADDED",
1830                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
1831                 continue;
1832             }
1833             OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
1834                                  "%s plm_base:setup_vm ADDING NODE %s",
1835                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
1836             /* retain a copy for our use in case the item gets
1837              * destructed along the way
1838              */
1839             OBJ_RETAIN(node);
1840             opal_list_append(&nodes, &node->super);
1841             /* reset the state so it can be used for mapping */
1842             node->state = ORTE_NODE_STATE_UP;
1843         }
1844         map->num_new_daemons = 0;
1845         /* if we didn't get anything, then there is nothing else to
1846          * do as no other daemons are to be launched
1847          */
1848         if (0 == opal_list_get_size(&nodes)) {
1849             OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1850                                  "%s plm:base:setup_vm no new daemons required",
1851                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1852             OBJ_DESTRUCT(&nodes);
1853             /* mark that the daemons have reported so we can proceed */
1854             daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
1855             ORTE_FLAG_UNSET(daemons, ORTE_JOB_FLAG_UPDATED);
1856             return ORTE_SUCCESS;
1857         }
1858         /* if we got some new nodes to launch, we need to handle it */
1859         goto process;
1860     }
1861 
1862     /* if we are not working with a virtual machine, then we
1863      * look across all jobs and ensure that the "VM" contains
1864      * all nodes with application procs on them
1865      */
1866     multi_sim = orte_get_attribute(&jdata->attributes, ORTE_JOB_MULTI_DAEMON_SIM, NULL, OPAL_BOOL);
1867     if (orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL) || multi_sim) {
1868         OBJ_CONSTRUCT(&nodes, opal_list_t);
1869         /* loop across all nodes and include those that have
1870          * num_procs > 0 && no daemon already on them
1871          */
1872         for (i=1; i < orte_node_pool->size; i++) {
1873             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
1874                 continue;
1875             }
1876             /* ignore nodes that are marked as do-not-use for this mapping */
1877             if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
1878                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
1879                                      "NODE %s IS MARKED NO_USE", node->name));
1880                 /* reset the state so it can be used another time */
1881                 node->state = ORTE_NODE_STATE_UP;
1882                 continue;
1883             }
1884             if (ORTE_NODE_STATE_DOWN == node->state) {
1885                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
1886                                      "NODE %s IS MARKED DOWN", node->name));
1887                 continue;
1888             }
1889             if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
1890                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
1891                                      "NODE %s IS MARKED NO_INCLUDE", node->name));
1892                 /* not to be used */
1893                 continue;
1894             }
1895             if (0 < node->num_procs || multi_sim) {
1896                 /* retain a copy for our use in case the item gets
1897                  * destructed along the way
1898                  */
1899                 OBJ_RETAIN(node);
1900                 opal_list_append(&nodes, &node->super);
1901             }
1902         }
1903         if (multi_sim) {
1904             goto process;
1905         }
1906         /* see if anybody had procs */
1907         if (0 == opal_list_get_size(&nodes)) {
1908             /* if the HNP has some procs, then we are still good */
1909             node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
1910             if (0 < node->num_procs) {
1911                 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1912                                      "%s plm:base:setup_vm only HNP in use",
1913                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1914                 OBJ_DESTRUCT(&nodes);
1915                 map->num_nodes = 1;
1916                 /* mark that the daemons have reported so we can proceed */
1917                 daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
1918                 return ORTE_SUCCESS;
1919             }
1920             /* well, if the HNP doesn't have any procs, and neither did
1921              * anyone else...then we have a big problem
1922              */
1923             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
1924             return ORTE_ERR_FATAL;
1925         }
1926         goto process;
1927     }
1928 
1929     if (0 == map->num_nodes) {
1930         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1931                              "%s plm:base:setup_vm creating map",
1932                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1933         /* this is the first time thru, so the vm is just getting
1934          * defined - put us in as we
1935          * are obviously already here! The ess will already
1936          * have assigned our node to us.
1937          */
1938         node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
1939         opal_pointer_array_add(map->nodes, (void*)node);
1940         ++(map->num_nodes);
1941         /* maintain accounting */
1942         OBJ_RETAIN(node);
1943     }
1944 
1945     /* zero-out the number of new daemons as we will compute this
1946      * each time we are called
1947      */
1948     map->num_new_daemons = 0;
1949 
1950     /* setup the list of nodes */
1951     OBJ_CONSTRUCT(&nodes, opal_list_t);
1952 
1953     /* if this is an unmanaged allocation, then we use
1954      * the nodes that were specified for the union of
1955      * all apps - there is no need to collect all
1956      * available nodes and "filter" them
1957      */
1958     if (!orte_managed_allocation) {
1959         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1960                              "%s setup:vm: working unmanaged allocation",
1961                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1962         default_hostfile_used = false;
1963         OBJ_CONSTRUCT(&tnodes, opal_list_t);
1964         for (i=0; i < jdata->apps->size; i++) {
1965             if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
1966                 continue;
1967             }
1968             /* if the app provided a dash-host, and we are not treating
1969              * them as requested or "soft" locations, then use those nodes
1970              */
1971             hosts = NULL;
1972             if (!orte_soft_locations &&
1973                 orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
1974                 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1975                                      "%s using dash_host",
1976                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1977                 if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&tnodes, hosts, false))) {
1978                     ORTE_ERROR_LOG(rc);
1979                     free(hosts);
1980                     return rc;
1981                 }
1982                 free(hosts);
1983             } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
1984                 /* otherwise, if the app provided a hostfile, then use that */
1985                 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1986                                      "%s using hostfile %s",
1987                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
1988                 if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&tnodes, hosts))) {
1989                     ORTE_ERROR_LOG(rc);
1990                     free(hosts);
1991                     return rc;
1992                 }
1993                 free(hosts);
1994             } else if (NULL != orte_rankfile) {
1995                 /* use the rankfile, if provided */
1996                 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1997                                      "%s using rankfile %s",
1998                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1999                                      orte_rankfile));
2000                 if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&tnodes,
2001                                                                        orte_rankfile))) {
2002                     ORTE_ERROR_LOG(rc);
2003                     return rc;
2004                 }
2005             } else if (NULL != orte_default_hostfile) {
2006                 if (!default_hostfile_used) {
2007                     /* fall back to the default hostfile, if provided */
2008                     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2009                                          "%s using default hostfile %s",
2010                                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2011                                          orte_default_hostfile));
2012                     if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&tnodes,
2013                                                                            orte_default_hostfile))) {
2014                         ORTE_ERROR_LOG(rc);
2015                         return rc;
2016                     }
2017                     /* only include it once */
2018                     default_hostfile_used = true;
2019                 }
2020             }
2021         }
2022         /* cycle thru the resulting list, finding the nodes on
2023          * the node pool array while removing ourselves
2024          * and all nodes that are down or otherwise unusable
2025          */
2026         while (NULL != (item = opal_list_remove_first(&tnodes))) {
2027             nptr = (orte_node_t*)item;
2028             OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2029                                  "%s checking node %s",
2030                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2031                                  nptr->name));
2032             for (i=0; i < orte_node_pool->size; i++) {
2033                 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
2034                     continue;
2035                 }
2036                 if (0 != strcmp(node->name, nptr->name)) {
2037                     continue;
2038                 }
2039                 /* have a match - now see if we want this node */
2040                 /* ignore nodes that are marked as do-not-use for this mapping */
2041                 if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
2042                     OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
2043                                          "NODE %s IS MARKED NO_USE", node->name));
2044                     /* reset the state so it can be used another time */
2045                     node->state = ORTE_NODE_STATE_UP;
2046                     break;
2047                 }
2048                 if (ORTE_NODE_STATE_DOWN == node->state) {
2049                     OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
2050                                          "NODE %s IS MARKED DOWN", node->name));
2051                     break;
2052                 }
2053                 if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
2054                     OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
2055                                          "NODE %s IS MARKED NO_INCLUDE", node->name));
2056                     break;
2057                 }
2058                 /* if this node is us, ignore it */
2059                 if (0 == node->index) {
2060                     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2061                                          "%s ignoring myself",
2062                                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
2063                     break;
2064                 }
2065                 /* we want it - add it to list */
2066                 OBJ_RETAIN(node);
2067                 opal_list_append(&nodes, &node->super);
2068             }
2069             OBJ_RELEASE(nptr);
2070         }
2071         OPAL_LIST_DESTRUCT(&tnodes);
2072         /* if we didn't get anything, then we are the only node in the
2073          * allocation - so there is nothing else to do as no other
2074          * daemons are to be launched
2075          */
2076         if (0 == opal_list_get_size(&nodes)) {
2077             OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2078                                  "%s plm:base:setup_vm only HNP in allocation",
2079                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
2080             OBJ_DESTRUCT(&nodes);
2081             /* mark that the daemons have reported so we can proceed */
2082             daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
2083             ORTE_FLAG_UNSET(daemons, ORTE_JOB_FLAG_UPDATED);
2084             return ORTE_SUCCESS;
2085         }
2086         /* continue processing */
2087         goto process;
2088     }
2089 
2090     /* construct a list of available nodes */
2091     for (i=1; i < orte_node_pool->size; i++) {
2092         if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
2093             /* ignore nodes that are marked as do-not-use for this mapping */
2094             if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
2095                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
2096                                      "NODE %s IS MARKED NO_USE", node->name));
2097                 /* reset the state so it can be used another time */
2098                 node->state = ORTE_NODE_STATE_UP;
2099                 continue;
2100             }
2101             if (ORTE_NODE_STATE_DOWN == node->state) {
2102                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
2103                                      "NODE %s IS MARKED DOWN", node->name));
2104                 continue;
2105             }
2106             if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
2107                 OPAL_OUTPUT_VERBOSE((10, orte_plm_base_framework.framework_output,
2108                                      "NODE %s IS MARKED NO_INCLUDE", node->name));
2109                 /* not to be used */
2110                 continue;
2111             }
2112             /* retain a copy for our use in case the item gets
2113              * destructed along the way
2114              */
2115             OBJ_RETAIN(node);
2116             opal_list_append(&nodes, &node->super);
2117             /* by default, mark these as not to be included
2118              * so the filtering logic works correctly
2119              */
2120             ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
2121         }
2122     }
2123 
2124     /* if we didn't get anything, then we are the only node in the
2125      * system - so there is nothing else to do as no other
2126      * daemons are to be launched
2127      */
2128     if (0 == opal_list_get_size(&nodes)) {
2129         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2130                              "%s plm:base:setup_vm only HNP in allocation",
2131                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
2132         /* cleanup */
2133         OBJ_DESTRUCT(&nodes);
2134         /* mark that the daemons have reported so we can proceed */
2135         daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
2136         ORTE_FLAG_UNSET(daemons, ORTE_JOB_FLAG_UPDATED);
2137         return ORTE_SUCCESS;
2138     }
2139 
2140     /* filter across the union of all app_context specs - if the HNP
2141      * was allocated, then we have to include
2142      * ourselves in case someone has specified a -host or hostfile
2143      * that includes the head node. We will remove ourselves later
2144      * as we clearly already exist
2145      */
2146     if (orte_hnp_is_allocated) {
2147         node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
2148         OBJ_RETAIN(node);
2149         opal_list_prepend(&nodes, &node->super);
2150     }
2151     for (i=0; i < jdata->apps->size; i++) {
2152         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
2153             continue;
2154         }
2155         if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, &nodes, false)) &&
2156             rc != ORTE_ERR_TAKE_NEXT_OPTION) {
2157             ORTE_ERROR_LOG(rc);
2158             return rc;
2159         }
2160         if (ORTE_SUCCESS == rc) {
2161             /* we filtered something */
2162             one_filter = true;
2163         }
2164     }
2165 
2166     if (one_filter) {
2167         /* at least one filtering option was executed, so
2168          * remove all nodes that were not mapped
2169          */
2170         item = opal_list_get_first(&nodes);
2171         while (item != opal_list_get_end(&nodes)) {
2172             next = opal_list_get_next(item);
2173             node = (orte_node_t*)item;
2174             if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
2175                 opal_list_remove_item(&nodes, item);
2176                 OBJ_RELEASE(item);
2177             } else {
2178                 /* The filtering logic sets this flag only for nodes which
2179                  * are kept after filtering. This flag will be subsequently
2180                  * used in rmaps components and must be reset here */
2181                 ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
2182             }
2183             item = next;
2184         }
2185     }
2186 
2187     /* ensure we are not on the list */
2188     if (0 < opal_list_get_size(&nodes)) {
2189         item = opal_list_get_first(&nodes);
2190         node = (orte_node_t*)item;
2191         if (0 == node->index) {
2192             opal_list_remove_item(&nodes, item);
2193             OBJ_RELEASE(item);
2194         }
2195     }
2196 
2197     /* if we didn't get anything, then we are the only node in the
2198      * allocation - so there is nothing else to do as no other
2199      * daemons are to be launched
2200      */
2201     if (0 == opal_list_get_size(&nodes)) {
2202         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2203                              "%s plm:base:setup_vm only HNP left",
2204                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
2205         OBJ_DESTRUCT(&nodes);
2206         /* mark that the daemons have reported so we can proceed */
2207         daemons->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
2208         ORTE_FLAG_UNSET(daemons, ORTE_JOB_FLAG_UPDATED);
2209         return ORTE_SUCCESS;
2210     }
2211 
2212  process:
2213     /* cycle thru all available nodes and find those that do not already
2214      * have a daemon on them - no need to include our own as we are
2215      * obviously already here! If a max vm size was given, then limit
2216      * the overall number of active nodes to the given number. Only
2217      * count the HNP's node if it was included in the allocation
2218      */
2219     if (orte_hnp_is_allocated) {
2220         num_nodes = 1;
2221     } else {
2222         num_nodes = 0;
2223     }
2224     while (NULL != (item = opal_list_remove_first(&nodes))) {
2225         /* if a max size was given and we are there, then exit the loop */
2226         if (0 < orte_max_vm_size && num_nodes == orte_max_vm_size) {
2227             /* maintain accounting */
2228             OBJ_RELEASE(item);
2229             break;
2230         }
2231         node = (orte_node_t*)item;
2232         /* if this node is already in the map, skip it */
2233         if (NULL != node->daemon) {
2234             num_nodes++;
2235             /* maintain accounting */
2236             OBJ_RELEASE(item);
2237             continue;
2238         }
2239         /* add the node to the map - we retained it
2240          * when adding it to the list, so we don't need
2241          * to retain it again
2242          */
2243         opal_pointer_array_add(map->nodes, (void*)node);
2244         ++(map->num_nodes);
2245         num_nodes++;
2246         /* create a new daemon object for this node */
2247         proc = OBJ_NEW(orte_proc_t);
2248         if (NULL == proc) {
2249             ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
2250             return ORTE_ERR_OUT_OF_RESOURCE;
2251         }
2252         proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
2253         if (ORTE_VPID_MAX-1 <= daemons->num_procs) {
2254             /* no more daemons available */
2255             orte_show_help("help-orte-rmaps-base.txt", "out-of-vpids", true);
2256             OBJ_RELEASE(proc);
2257             return ORTE_ERR_OUT_OF_RESOURCE;
2258         }
2259         proc->name.vpid = daemons->num_procs;  /* take the next available vpid */
2260         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2261                              "%s plm:base:setup_vm add new daemon %s",
2262                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2263                              ORTE_NAME_PRINT(&proc->name)));
2264         /* add the daemon to the daemon job object */
2265         if (0 > (rc = opal_pointer_array_set_item(daemons->procs, proc->name.vpid, (void*)proc))) {
2266             ORTE_ERROR_LOG(rc);
2267             return rc;
2268         }
2269         ++daemons->num_procs;
2270         OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
2271                              "%s plm:base:setup_vm assigning new daemon %s to node %s",
2272                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2273                              ORTE_NAME_PRINT(&proc->name),
2274                              node->name));
2275         /* point the node to the daemon */
2276         node->daemon = proc;
2277         OBJ_RETAIN(proc);  /* maintain accounting */
2278         /* point the proc to the node and maintain accounting */
2279         proc->node = node;
2280         OBJ_RETAIN(node);
2281         if (orte_plm_globals.daemon_nodes_assigned_at_launch) {
2282             ORTE_FLAG_SET(node, ORTE_NODE_FLAG_LOC_VERIFIED);
2283         } else {
2284             ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_LOC_VERIFIED);
2285         }
2286         /* track number of daemons to be launched */
2287         ++map->num_new_daemons;
2288         /* and their starting vpid */
2289         if (ORTE_VPID_INVALID == map->daemon_vpid_start) {
2290             map->daemon_vpid_start = proc->name.vpid;
2291         }
2292         /* loop across all app procs on this node and update their parent */
2293         for (i=0; i < node->procs->size; i++) {
2294             if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
2295                 pptr->parent = proc->name.vpid;
2296             }
2297         }
2298     }
2299 
2300     if (orte_process_info.num_procs != daemons->num_procs) {
2301         /* more daemons are being launched - update the routing tree to
2302          * ensure that the HNP knows how to route messages via
2303          * the daemon routing tree - this needs to be done
2304          * here to avoid potential race conditions where the HNP
2305          * hasn't unpacked its launch message prior to being
2306          * asked to communicate.
2307          */
2308         orte_process_info.num_procs = daemons->num_procs;
2309 
2310         if (orte_process_info.max_procs < orte_process_info.num_procs) {
2311             orte_process_info.max_procs = orte_process_info.num_procs;
2312         }
2313 
2314         /* ensure all routing plans are up-to-date - we need this
2315          * so we know how to tree-spawn and/or xcast info */
2316         orte_routed.update_routing_plan();
2317     }
2318 
2319     /* mark that the daemon job changed */
2320     ORTE_FLAG_SET(daemons, ORTE_JOB_FLAG_UPDATED);
2321 
2322     /* if new daemons are being launched, mark that this job
2323      * caused it to happen */
2324     if (0 < map->num_new_daemons) {
2325         if (ORTE_SUCCESS != (rc = orte_set_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS,
2326                                                      true, NULL, OPAL_BOOL))) {
2327             ORTE_ERROR_LOG(rc);
2328             return rc;
2329         }
2330     }
2331 
2332     return ORTE_SUCCESS;
2333 }

/* [<][>][^][v][top][bottom][index][help] */