root/orte/mca/odls/base/odls_base_default_fns.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. setup_cbfunc
  2. orte_odls_base_default_get_add_procs_data
  3. ls_cbunc
  4. orte_odls_base_default_construct_child_list
  5. setup_path
  6. timer_cb
  7. compute_num_procs_alive
  8. orte_odls_base_spawn_proc
  9. orte_odls_base_default_launch_local
  10. orte_odls_base_default_signal_local_procs
  11. orte_odls_base_default_wait_local_proc
  12. qcdcon
  13. qcddes
  14. orte_odls_base_default_kill_local_procs
  15. orte_odls_base_get_proc_stats
  16. orte_odls_base_default_restart_proc

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007-2011 Oracle and/or its affiliates.  All rights reserved.
  13  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
  14  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
  15  *                         All rights reserved.
  16  * Copyright (c) 2011-2018 Cisco Systems, Inc.  All rights reserved
  17  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  18  * Copyright (c) 2014-2018 Research Organization for Information Science
  19  *                         and Technology (RIST).  All rights reserved.
  20  * Copyright (c) 2017      Mellanox Technologies Ltd. All rights reserved.
  21  * Copyright (c) 2017      IBM Corporation. All rights reserved.
  22  * $COPYRIGHT$
  23  *
  24  * Additional copyrights may follow
  25  *
  26  * $HEADER$
  27  */
  28 
  29 
  30 #include "orte_config.h"
  31 #include "orte/constants.h"
  32 #include "orte/types.h"
  33 
  34 #ifdef HAVE_SYS_WAIT_H
  35 #include <sys/wait.h>
  36 #endif
  37 #include <errno.h>
  38 #ifdef HAVE_SYS_STAT_H
  39 #include <sys/stat.h>
  40 #endif  /* HAVE_SYS_STAT_H */
  41 #ifdef HAVE_SYS_PARAM_H
  42 #include <sys/param.h>
  43 #endif
  44 #include <time.h>
  45 
  46 #include <signal.h>
  47 
  48 #include "opal_stdint.h"
  49 #include "opal/util/opal_environ.h"
  50 #include "opal/util/argv.h"
  51 #include "opal/util/os_dirpath.h"
  52 #include "opal/util/os_path.h"
  53 #include "opal/util/path.h"
  54 #include "opal/util/printf.h"
  55 #include "opal/util/sys_limits.h"
  56 #include "opal/dss/dss.h"
  57 #include "opal/mca/hwloc/hwloc-internal.h"
  58 #include "opal/mca/shmem/base/base.h"
  59 #include "opal/mca/pstat/pstat.h"
  60 #include "opal/mca/pmix/base/base.h"
  61 
  62 #include "orte/mca/errmgr/errmgr.h"
  63 #include "orte/mca/rml/rml.h"
  64 #include "orte/mca/routed/routed.h"
  65 #include "orte/mca/iof/iof.h"
  66 #include "orte/mca/iof/base/iof_base_setup.h"
  67 #include "orte/mca/ess/base/base.h"
  68 #include "orte/mca/grpcomm/base/base.h"
  69 #include "orte/mca/plm/base/base.h"
  70 #include "orte/mca/rml/base/rml_contact.h"
  71 #include "orte/mca/rmaps/rmaps_types.h"
  72 #include "orte/mca/rmaps/base/base.h"
  73 #include "orte/mca/rmaps/base/rmaps_private.h"
  74 #include "orte/mca/rtc/rtc.h"
  75 #include "orte/mca/schizo/schizo.h"
  76 #include "orte/mca/state/state.h"
  77 #include "orte/mca/filem/filem.h"
  78 
  79 #include "orte/util/context_fns.h"
  80 #include "orte/util/name_fns.h"
  81 #include "orte/util/nidmap.h"
  82 #include "orte/util/session_dir.h"
  83 #include "orte/util/proc_info.h"
  84 #include "orte/util/show_help.h"
  85 #include "orte/util/threads.h"
  86 #include "orte/runtime/orte_globals.h"
  87 #include "orte/runtime/orte_wait.h"
  88 #include "orte/orted/orted.h"
  89 #include "orte/orted/pmix/pmix_server.h"
  90 
  91 #if OPAL_ENABLE_FT_CR == 1
  92 #include "orte/mca/snapc/snapc.h"
  93 #include "orte/mca/snapc/base/base.h"
  94 #include "orte/mca/sstore/sstore.h"
  95 #include "orte/mca/sstore/base/base.h"
  96 #include "opal/mca/crs/crs.h"
  97 #include "opal/mca/crs/base/base.h"
  98 #endif
  99 
 100 #include "orte/mca/odls/base/base.h"
 101 #include "orte/mca/odls/base/odls_private.h"
 102 
 103 static void setup_cbfunc(int status,
 104                          opal_list_t *info,
 105                          void *provided_cbdata,
 106                          opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
 107 {
 108     orte_job_t *jdata = (orte_job_t*)provided_cbdata;
 109     opal_value_t *kv;
 110     opal_buffer_t cache, *bptr;
 111     int rc = ORTE_SUCCESS;
 112 
 113     OBJ_CONSTRUCT(&cache, opal_buffer_t);
 114     if (NULL != info) {
 115         /* cycle across the provided info */
 116         OPAL_LIST_FOREACH(kv, info, opal_value_t) {
 117             if (OPAL_SUCCESS != (rc = opal_dss.pack(&cache, &kv, 1, OPAL_VALUE))) {
 118                 ORTE_ERROR_LOG(rc);
 119             }
 120         }
 121     }
 122     /* add the results */
 123     bptr = &cache;
 124     opal_dss.pack(&jdata->launch_msg, &bptr, 1, OPAL_BUFFER);
 125     OBJ_DESTRUCT(&cache);
 126 
 127     /* release our caller */
 128     if (NULL != cbfunc) {
 129         cbfunc(rc, cbdata);
 130     }
 131 
 132     /* move to next stage */
 133     ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG);
 134 
 135 }
 136 /* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO PACKED IN
 137  * THIS FUNCTION BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW
 138 */
 139 int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
 140                                               orte_jobid_t job)
 141 {
 142     int rc, v;
 143     orte_job_t *jdata=NULL, *jptr;
 144     orte_job_map_t *map=NULL;
 145     opal_buffer_t *wireup, jobdata, priorjob;
 146     opal_byte_object_t bo, *boptr;
 147     int32_t numbytes;
 148     int8_t flag;
 149     void *nptr;
 150     uint32_t key;
 151     orte_proc_t *dmn, *proc;
 152     opal_value_t *val = NULL, *kv;
 153     opal_list_t *modex, ilist;
 154     int n;
 155 
 156     /* get the job data pointer */
 157     if (NULL == (jdata = orte_get_job_data_object(job))) {
 158         ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 159         return ORTE_ERR_BAD_PARAM;
 160     }
 161 
 162     /* get a pointer to the job map */
 163     map = jdata->map;
 164     /* if there is no map, just return */
 165     if (NULL == map) {
 166         return ORTE_SUCCESS;
 167     }
 168 
 169     /* provide the nidmap - i.e., the map of hostnames
 170      * and the vpid of the daemon running on each node.
 171      * In a DVM, we should only have to do this once */
 172     if (1 < orte_process_info.num_procs &&
 173         (!orte_node_info_communicated ||
 174          orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL))) {
 175         /* mark that we did include this info */
 176         flag = 1;
 177         opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
 178         /* load the nidmap */
 179         if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) {
 180             ORTE_ERROR_LOG(rc);
 181             return rc;
 182         }
 183 
 184         /* get wireup info for daemons */
 185         if (NULL == (jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
 186             ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 187             return ORTE_ERR_BAD_PARAM;
 188         }
 189         wireup = OBJ_NEW(opal_buffer_t);
 190         /* always include data for mpirun as the daemons can't have it yet */
 191         val = NULL;
 192         if (opal_pmix.legacy_get()) {
 193             if (OPAL_SUCCESS != (rc = opal_pmix.get(ORTE_PROC_MY_NAME, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) {
 194                 ORTE_ERROR_LOG(rc);
 195                 OBJ_RELEASE(wireup);
 196                 return rc;
 197             } else {
 198                 /* pack the name of the daemon */
 199                 if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
 200                     ORTE_ERROR_LOG(rc);
 201                     OBJ_RELEASE(wireup);
 202                     return rc;
 203                 }
 204                 /* pack the URI */
 205                if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) {
 206                     ORTE_ERROR_LOG(rc);
 207                     OBJ_RELEASE(wireup);
 208                     return rc;
 209                 }
 210                 OBJ_RELEASE(val);
 211             }
 212         } else {
 213             if (OPAL_SUCCESS != (rc = opal_pmix.get(ORTE_PROC_MY_NAME, NULL, NULL, &val)) || NULL == val) {
 214                 ORTE_ERROR_LOG(rc);
 215                 OBJ_RELEASE(wireup);
 216                 return rc;
 217             }
 218             /* the data is returned as a list of key-value pairs in the opal_value_t */
 219             if (OPAL_PTR != val->type) {
 220                 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 221                 OBJ_RELEASE(wireup);
 222                 return ORTE_ERR_NOT_FOUND;
 223             }
 224             if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
 225                 ORTE_ERROR_LOG(rc);
 226                 OBJ_RELEASE(wireup);
 227                 return rc;
 228             }
 229             modex = (opal_list_t*)val->data.ptr;
 230             numbytes = (int32_t)opal_list_get_size(modex);
 231             if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
 232                 ORTE_ERROR_LOG(rc);
 233                 OBJ_RELEASE(wireup);
 234                 return rc;
 235             }
 236             OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
 237                 if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
 238                     ORTE_ERROR_LOG(rc);
 239                     OBJ_RELEASE(wireup);
 240                     return rc;
 241                 }
 242             }
 243             OPAL_LIST_RELEASE(modex);
 244             OBJ_RELEASE(val);
 245         }
 246         /* provide a complete map of connection info */
 247         for (v=1; v < jptr->procs->size; v++) {
 248             if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) {
 249                 continue;
 250             }
 251             val = NULL;
 252             if (opal_pmix.legacy_get()) {
 253                 if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) {
 254                     ORTE_ERROR_LOG(rc);
 255                     OBJ_RELEASE(buffer);
 256                     OBJ_RELEASE(wireup);
 257                     return rc;
 258                 } else {
 259                     /* pack the name of the daemon */
 260                     if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
 261                         ORTE_ERROR_LOG(rc);
 262                         OBJ_RELEASE(buffer);
 263                         OBJ_RELEASE(wireup);
 264                         return rc;
 265                     }
 266                     /* pack the URI */
 267                    if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) {
 268                         ORTE_ERROR_LOG(rc);
 269                         OBJ_RELEASE(buffer);
 270                         OBJ_RELEASE(wireup);
 271                         return rc;
 272                     }
 273                     OBJ_RELEASE(val);
 274                 }
 275             } else {
 276                 if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) {
 277                     ORTE_ERROR_LOG(rc);
 278                     OBJ_RELEASE(buffer);
 279                     return rc;
 280                 } else {
 281                     /* the data is returned as a list of key-value pairs in the opal_value_t */
 282                     if (OPAL_PTR != val->type) {
 283                         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 284                         OBJ_RELEASE(buffer);
 285                         return ORTE_ERR_NOT_FOUND;
 286                     }
 287                     if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) {
 288                         ORTE_ERROR_LOG(rc);
 289                         OBJ_RELEASE(buffer);
 290                         OBJ_RELEASE(wireup);
 291                         return rc;
 292                     }
 293                     modex = (opal_list_t*)val->data.ptr;
 294                     numbytes = (int32_t)opal_list_get_size(modex);
 295                     if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) {
 296                         ORTE_ERROR_LOG(rc);
 297                         OBJ_RELEASE(buffer);
 298                         OBJ_RELEASE(wireup);
 299                         return rc;
 300                     }
 301                     OPAL_LIST_FOREACH(kv, modex, opal_value_t) {
 302                         if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) {
 303                             ORTE_ERROR_LOG(rc);
 304                             OBJ_RELEASE(buffer);
 305                             OBJ_RELEASE(wireup);
 306                             return rc;
 307                         }
 308                     }
 309                     OPAL_LIST_RELEASE(modex);
 310                     OBJ_RELEASE(val);
 311                 }
 312             }
 313         }
 314         /* put it in a byte object for xmission */
 315         opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes);
 316         OBJ_RELEASE(wireup);
 317         /* pack the byte object - zero-byte objects are fine */
 318         bo.size = numbytes;
 319         boptr = &bo;
 320         if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) {
 321             ORTE_ERROR_LOG(rc);
 322             return rc;
 323         }
 324         /* release the data since it has now been copied into our buffer */
 325         if (NULL != bo.bytes) {
 326             free(bo.bytes);
 327         }
 328 
 329         /* we need to ensure that any new daemons get a complete
 330          * copy of all active jobs so the grpcomm collectives can
 331          * properly work should a proc from one of the other jobs
 332          * interact with this one */
 333         if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) {
 334             flag = 1;
 335             opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
 336             OBJ_CONSTRUCT(&jobdata, opal_buffer_t);
 337             rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jptr, &nptr);
 338             while (OPAL_SUCCESS == rc) {
 339                 /* skip the one we are launching now */
 340                 if (NULL != jptr && jptr != jdata &&
 341                     ORTE_PROC_MY_NAME->jobid != jptr->jobid) {
 342                     OBJ_CONSTRUCT(&priorjob, opal_buffer_t);
 343                     /* pack the job struct */
 344                     if (ORTE_SUCCESS != (rc = opal_dss.pack(&priorjob, &jptr, 1, ORTE_JOB))) {
 345                         ORTE_ERROR_LOG(rc);
 346                         OBJ_DESTRUCT(&jobdata);
 347                         OBJ_DESTRUCT(&priorjob);
 348                         return rc;
 349                     }
 350                     /* pack the location of each proc */
 351                     for (n=0; n < jptr->procs->size; n++) {
 352                         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, n))) {
 353                             continue;
 354                         }
 355                         if (ORTE_SUCCESS != (rc = opal_dss.pack(&priorjob, &proc->parent, 1, ORTE_VPID))) {
 356                             ORTE_ERROR_LOG(rc);
 357                             OBJ_DESTRUCT(&jobdata);
 358                             OBJ_DESTRUCT(&priorjob);
 359                             return rc;
 360                         }
 361                     }
 362                     /* pack the jobdata buffer */
 363                     wireup = &priorjob;
 364                     if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &wireup, 1, OPAL_BUFFER))) {
 365                         ORTE_ERROR_LOG(rc);
 366                         OBJ_DESTRUCT(&jobdata);
 367                         OBJ_DESTRUCT(&priorjob);
 368                         return rc;
 369                     }
 370                     OBJ_DESTRUCT(&priorjob);
 371                 }
 372                 rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr);
 373             }
 374             /* pack the jobdata buffer */
 375             wireup = &jobdata;
 376             if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) {
 377                 ORTE_ERROR_LOG(rc);
 378                 OBJ_DESTRUCT(&jobdata);
 379                 return rc;
 380             }
 381             OBJ_DESTRUCT(&jobdata);
 382         } else {
 383             flag = 0;
 384             opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
 385         }
 386         orte_node_info_communicated = true;
 387     } else {
 388         /* mark that we didn't */
 389         flag = 0;
 390         opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
 391         /* and that we didn't launch daemons */
 392         flag = 0;
 393         opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
 394     }
 395 
 396     /* pack the job struct */
 397     if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &jdata, 1, ORTE_JOB))) {
 398         ORTE_ERROR_LOG(rc);
 399         return rc;
 400     }
 401 
 402     if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
 403         /* compute and pack the ppn */
 404         if (ORTE_SUCCESS != (rc = orte_util_generate_ppn(jdata, buffer))) {
 405             ORTE_ERROR_LOG(rc);
 406             return rc;
 407         }
 408     }
 409 
 410     /* get any application prep info */
 411     if (NULL != opal_pmix.server_setup_application) {
 412         OBJ_CONSTRUCT(&ilist, opal_list_t);
 413         /* request to allocate network resources */
 414         kv = OBJ_NEW(opal_value_t);
 415         kv->key = strdup(OPAL_PMIX_ALLOC_NETWORK_ID);
 416         kv->type = OPAL_STRING;
 417         opal_asprintf(&kv->data.string, "%s.net", ORTE_JOBID_PRINT(jdata->jobid));
 418         opal_list_append(&ilist, &kv->super);
 419         /* ask for security keys */
 420         kv = OBJ_NEW(opal_value_t);
 421         kv->key = strdup(OPAL_PMIX_ALLOC_NETWORK_SEC_KEY);
 422         kv->type = OPAL_BOOL;
 423         kv->data.flag = true;
 424         opal_list_append(&ilist, &kv->super);
 425         /* ask for envars to be forwarded */
 426         kv = OBJ_NEW(opal_value_t);
 427         kv->key = strdup(OPAL_PMIX_SETUP_APP_ENVARS);
 428         kv->type = OPAL_BOOL;
 429         kv->data.flag = true;
 430         opal_list_append(&ilist, &kv->super);
 431         /* we don't want to block here because it could
 432          * take some indeterminate time to get the info */
 433         if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_application(jdata->jobid, &ilist, setup_cbfunc, jdata))) {
 434             ORTE_ERROR_LOG(rc);
 435         }
 436         OPAL_LIST_DESTRUCT(&ilist);
 437         return rc;
 438     }
 439 
 440     /* move to next stage */
 441     ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG);
 442 
 443     return ORTE_SUCCESS;
 444 }
 445 
 446 static void ls_cbunc(int status, void *cbdata)
 447 {
 448     opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
 449     OPAL_PMIX_WAKEUP_THREAD(lock);
 450 }
 451 
 452 int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
 453                                                 orte_jobid_t *job)
 454 {
 455     int rc;
 456     orte_std_cntr_t cnt;
 457     orte_job_t *jdata=NULL, *daemons;
 458     orte_node_t *node;
 459     orte_vpid_t dmnvpid, v;
 460     int32_t n;
 461     opal_buffer_t *bptr, *jptr;
 462     orte_proc_t *pptr, *dmn;
 463     orte_app_context_t *app;
 464     int8_t flag;
 465     opal_value_t *kv;
 466     opal_list_t local_support, cache;
 467     opal_pmix_lock_t lock;
 468 
 469     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
 470                          "%s odls:constructing child list",
 471                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 472 
 473     /* set a default response */
 474     *job = ORTE_JOBID_INVALID;
 475     /* get the daemon job object */
 476     daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
 477     OPAL_PMIX_CONSTRUCT_LOCK(&lock);
 478     OBJ_CONSTRUCT(&local_support, opal_list_t);
 479 
 480     /* unpack the flag to see if new daemons were launched */
 481     cnt=1;
 482     if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &cnt, OPAL_INT8))) {
 483         ORTE_ERROR_LOG(rc);
 484         goto REPORT_ERROR;
 485     }
 486 
 487     if (0 != flag) {
 488         /* unpack the buffer containing the info */
 489         cnt=1;
 490         if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) {
 491             *job = ORTE_JOBID_INVALID;
 492             ORTE_ERROR_LOG(rc);
 493             OBJ_RELEASE(bptr);
 494             goto REPORT_ERROR;
 495         }
 496         cnt=1;
 497         while (ORTE_SUCCESS == (rc = opal_dss.unpack(bptr, &jptr, &cnt, OPAL_BUFFER))) {
 498             /* unpack each job and add it to the local orte_job_data array */
 499             cnt=1;
 500             if (ORTE_SUCCESS != (rc = opal_dss.unpack(jptr, &jdata, &cnt, ORTE_JOB))) {
 501                 *job = ORTE_JOBID_INVALID;
 502                 ORTE_ERROR_LOG(rc);
 503                 OBJ_RELEASE(bptr);
 504                 OBJ_RELEASE(jptr);
 505                 goto REPORT_ERROR;
 506             }
 507             /* check to see if we already have this one */
 508             if (NULL == orte_get_job_data_object(jdata->jobid)) {
 509                 /* nope - add it */
 510                 opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
 511             } else {
 512                 /* yep - so we can drop this copy */
 513                 jdata->jobid = ORTE_JOBID_INVALID;
 514                 OBJ_RELEASE(jdata);
 515                 OBJ_RELEASE(jptr);
 516                 cnt=1;
 517                 continue;
 518             }
 519             /* unpack the location of each proc in this job */
 520             for (v=0; v < jdata->num_procs; v++) {
 521                 if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, v))) {
 522                     pptr = OBJ_NEW(orte_proc_t);
 523                     pptr->name.jobid = jdata->jobid;
 524                     pptr->name.vpid = v;
 525                     opal_pointer_array_set_item(jdata->procs, v, pptr);
 526                 }
 527                 cnt=1;
 528                 if (ORTE_SUCCESS != (rc = opal_dss.unpack(jptr, &dmnvpid, &cnt, ORTE_VPID))) {
 529                     ORTE_ERROR_LOG(rc);
 530                     OBJ_RELEASE(jptr);
 531                     OBJ_RELEASE(bptr);
 532                     goto REPORT_ERROR;
 533                 }
 534                 /* lookup the daemon */
 535                 if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, dmnvpid))) {
 536                     ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 537                     rc = ORTE_ERR_NOT_FOUND;
 538                     OBJ_RELEASE(jptr);
 539                     OBJ_RELEASE(bptr);
 540                     goto REPORT_ERROR;
 541                 }
 542                 /* connect the two */
 543                 OBJ_RETAIN(dmn->node);
 544                 pptr->node = dmn->node;
 545             }
 546             /* release the buffer */
 547             OBJ_RELEASE(jptr);
 548             cnt = 1;
 549         }
 550         OBJ_RELEASE(bptr);
 551     }
 552 
 553     /* unpack the job we are to launch */
 554     cnt=1;
 555     if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jdata, &cnt, ORTE_JOB))) {
 556         *job = ORTE_JOBID_INVALID;
 557         ORTE_ERROR_LOG(rc);
 558         goto REPORT_ERROR;
 559     }
 560     if (ORTE_JOBID_INVALID == jdata->jobid) {
 561         ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 562         rc = ORTE_ERR_BAD_PARAM;
 563         goto REPORT_ERROR;
 564     }
 565     *job = jdata->jobid;
 566 
 567     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
 568                          "%s odls:construct_child_list unpacking data to launch job %s",
 569                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
 570 
 571     /* if we are the HNP, we don't need to unpack this buffer - we already
 572      * have all the required info in our local job array. So just build the
 573      * array of local children
 574      */
 575     if (ORTE_PROC_IS_HNP) {
 576         /* we don't want/need the extra copy of the orte_job_t, but
 577          * we can't just release it as that will NULL the location in
 578          * the orte_job_data array. So set the jobid to INVALID to
 579          * protect the array, and then release the object to free
 580          * the storage */
 581         jdata->jobid = ORTE_JOBID_INVALID;
 582         OBJ_RELEASE(jdata);
 583         /* get the correct job object - it will be completely filled out */
 584         if (NULL == (jdata = orte_get_job_data_object(*job))) {
 585             ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 586             rc = ORTE_ERR_NOT_FOUND;
 587             goto REPORT_ERROR;
 588         }
 589     } else {
 590         opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
 591 
 592         /* ensure the map object is present */
 593         if (NULL == jdata->map) {
 594             jdata->map = OBJ_NEW(orte_job_map_t);
 595         }
 596     }
 597 
 598     /* if the job is fully described, then mpirun will have computed
 599      * and sent us the complete array of procs in the orte_job_t, so we
 600      * don't need to do anything more here */
 601     if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
 602         /* load the ppn info into the job and node arrays - the
 603          * function will ignore the data on the HNP as it already
 604          * has the info */
 605         if (ORTE_SUCCESS != (rc = orte_util_decode_ppn(jdata, buffer))) {
 606             ORTE_ERROR_LOG(rc);
 607             goto REPORT_ERROR;
 608         }
 609 
 610         if (!ORTE_PROC_IS_HNP) {
 611             /* assign locations to the procs */
 612             if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
 613                 ORTE_ERROR_LOG(rc);
 614                 goto REPORT_ERROR;
 615             }
 616         }
 617 
 618         /* compute the ranks and add the proc objects
 619          * to the jdata->procs array */
 620         if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
 621             ORTE_ERROR_LOG(rc);
 622             goto REPORT_ERROR;
 623         }
 624         /* and finally, compute the local and node ranks */
 625         if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
 626             ORTE_ERROR_LOG(rc);
 627             goto REPORT_ERROR;
 628         }
 629     }
 630 
 631     /* unpack the buffer containing any application setup info - there
 632      * might not be any, so it isn't an error if we don't find things */
 633     cnt=1;
 634     rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER);
 635     if (OPAL_SUCCESS == rc) {
 636         /* there was setup data - process it */
 637         cnt=1;
 638         OBJ_CONSTRUCT(&cache, opal_list_t);
 639         while (ORTE_SUCCESS == (rc = opal_dss.unpack(bptr, &kv, &cnt, OPAL_VALUE))) {
 640             /* if this is an envar operation, cache it in reverse order
 641              * so that the order the user provided is preserved */
 642             if (0 == strcmp(kv->key, OPAL_PMIX_SET_ENVAR) ||
 643                 0 == strcmp(kv->key, OPAL_PMIX_ADD_ENVAR) ||
 644                 0 == strcmp(kv->key, OPAL_PMIX_UNSET_ENVAR) ||
 645                 0 == strcmp(kv->key, OPAL_PMIX_PREPEND_ENVAR) ||
 646                 0 == strcmp(kv->key, OPAL_PMIX_APPEND_ENVAR)) {
 647                 opal_output_verbose(5, orte_odls_base_framework.framework_output,
 648                                     "ORTE:ODLS ADDING ENVAR %s", kv->data.envar.envar);
 649                 opal_list_prepend(&cache, &kv->super);
 650             } else {
 651                 /* need to pass it to pmix.setup_local_support */
 652                 opal_list_append(&local_support, &kv->super);
 653             }
 654         }
 655         OBJ_RELEASE(bptr);
 656         /* add any cache'd values  to the front of the job attributes  */
 657         while (NULL != (kv = (opal_value_t*)opal_list_remove_first(&cache))) {
 658             if (0 == strcmp(kv->key, OPAL_PMIX_SET_ENVAR)) {
 659                 orte_prepend_attribute(&jdata->attributes, ORTE_JOB_SET_ENVAR,
 660                                        ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR);
 661             } else if (0 == strcmp(kv->key, OPAL_PMIX_ADD_ENVAR)) {
 662                 orte_prepend_attribute(&jdata->attributes, ORTE_JOB_ADD_ENVAR,
 663                                        ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR);
 664             } else if (0 == strcmp(kv->key, OPAL_PMIX_UNSET_ENVAR)) {
 665                 orte_prepend_attribute(&jdata->attributes, ORTE_JOB_UNSET_ENVAR,
 666                                        ORTE_ATTR_GLOBAL, kv->data.string, OPAL_STRING);
 667             } else if (0 == strcmp(kv->key, OPAL_PMIX_PREPEND_ENVAR)) {
 668                 orte_prepend_attribute(&jdata->attributes, ORTE_JOB_PREPEND_ENVAR,
 669                                        ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR);
 670             } else if (0 == strcmp(kv->key, OPAL_PMIX_APPEND_ENVAR)) {
 671                 orte_prepend_attribute(&jdata->attributes, ORTE_JOB_APPEND_ENVAR,
 672                                        ORTE_ATTR_GLOBAL, &kv->data.envar, OPAL_ENVAR);
 673             }
 674             OBJ_RELEASE(kv);
 675         }
 676         OPAL_LIST_DESTRUCT(&cache);
 677     }
 678 
 679     /* now that the node array in the job map and jdata are completely filled out,.
 680      * we need to "wireup" the procs to their nodes so other utilities can
 681      * locate them */
 682     for (n=0; n < jdata->procs->size; n++) {
 683         if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) {
 684             continue;
 685         }
 686         if (ORTE_PROC_STATE_UNDEF == pptr->state) {
 687             /* not ready for use yet */
 688             continue;
 689         }
 690         if (!ORTE_PROC_IS_HNP &&
 691             orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
 692             /* the parser will have already made the connection, but the fully described
 693              * case won't have done it, so connect the proc to its node here */
 694             opal_output_verbose(5, orte_odls_base_framework.framework_output,
 695                                 "%s GETTING DAEMON FOR PROC %s WITH PARENT %s",
 696                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 697                                 ORTE_NAME_PRINT(&pptr->name),
 698                                 ORTE_VPID_PRINT(pptr->parent));
 699             if (ORTE_VPID_INVALID == pptr->parent) {
 700                 ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 701                 rc = ORTE_ERR_BAD_PARAM;
 702                 goto REPORT_ERROR;
 703             }
 704             /* connect the proc to its node object */
 705             if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, pptr->parent))) {
 706                 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 707                 rc = ORTE_ERR_NOT_FOUND;
 708                 goto REPORT_ERROR;
 709             }
 710             OBJ_RETAIN(dmn->node);
 711             pptr->node = dmn->node;
 712             /* add the node to the job map, if needed */
 713             if (!ORTE_FLAG_TEST(pptr->node, ORTE_NODE_FLAG_MAPPED)) {
 714                 OBJ_RETAIN(pptr->node);
 715                 opal_pointer_array_add(jdata->map->nodes, pptr->node);
 716                 jdata->map->num_nodes++;
 717                 ORTE_FLAG_SET(pptr->node, ORTE_NODE_FLAG_MAPPED);
 718             }
 719             /* add this proc to that node */
 720             OBJ_RETAIN(pptr);
 721             opal_pointer_array_add(pptr->node->procs, pptr);
 722             pptr->node->num_procs++;
 723         }
 724         /* see if it belongs to us */
 725         if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
 726             /* is this child on our current list of children */
 727             if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) {
 728                 /* not on the local list */
 729                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
 730                                      "%s[%s:%d] adding proc %s to my local list",
 731                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 732                                      __FILE__, __LINE__,
 733                                      ORTE_NAME_PRINT(&pptr->name)));
 734                 /* keep tabs of the number of local procs */
 735                 jdata->num_local_procs++;
 736                 /* add this proc to our child list */
 737                 OBJ_RETAIN(pptr);
 738                 ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL);
 739                 opal_pointer_array_add(orte_local_children, pptr);
 740             }
 741 
 742             /* if the job is in restart mode, the child must not barrier when launched */
 743             if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
 744                 orte_set_attribute(&pptr->attributes, ORTE_PROC_NOBARRIER, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
 745             }
 746             /* mark that this app_context is being used on this node */
 747             app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
 748             ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
 749         }
 750     }
 751     if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
 752         /* reset the mapped flags */
 753         for (n=0; n < jdata->map->nodes->size; n++) {
 754             if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
 755                 ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
 756             }
 757         }
 758     }
 759 
 760     if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
 761         /* compute and save bindings of local children */
 762         if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
 763             ORTE_ERROR_LOG(rc);
 764             goto REPORT_ERROR;
 765         }
 766     }
 767 
 768     /* if we wanted to see the map, now is the time to display it */
 769     if (jdata->map->display_map) {
 770         orte_rmaps_base_display_map(jdata);
 771     }
 772 
 773     /* register this job with the PMIx server - need to wait until after we
 774      * have computed the #local_procs before calling the function */
 775     if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) {
 776         ORTE_ERROR_LOG(rc);
 777         goto REPORT_ERROR;
 778     }
 779 
 780     /* if we have local support setup info, then execute it here - we
 781      * have to do so AFTER we register the nspace so the PMIx server
 782      * has the nspace info it needs */
 783     if (0 < opal_list_get_size(&local_support) &&
 784         NULL != opal_pmix.server_setup_local_support) {
 785         if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support,
 786                                                                        ls_cbunc, &lock))) {
 787             ORTE_ERROR_LOG(rc);
 788             goto REPORT_ERROR;
 789         }
 790     } else {
 791         lock.active = false;  // we won't get a callback
 792     }
 793 
 794     /* load any controls into the job */
 795     orte_rtc.assign(jdata);
 796 
 797     /* spin up the spawn threads */
 798     orte_odls_base_start_threads(jdata);
 799 
 800     /* to save memory, purge the job map of all procs other than
 801      * our own - for daemons, this will completely release the
 802      * proc structures. For the HNP, the proc structs will
 803      * remain in the orte_job_t array */
 804 
 805     /* wait here until the local support has been setup */
 806     OPAL_PMIX_WAIT_THREAD(&lock);
 807     OPAL_PMIX_DESTRUCT_LOCK(&lock);
 808     OPAL_LIST_DESTRUCT(&local_support);
 809     return ORTE_SUCCESS;
 810 
 811   REPORT_ERROR:
 812     OPAL_PMIX_DESTRUCT_LOCK(&lock);
 813     OPAL_LIST_DESTRUCT(&local_support);
 814     /* we have to report an error back to the HNP so we don't just
 815      * hang. Although there shouldn't be any errors once this is
 816      * all debugged, it is still good practice to have a way
 817      * for it to happen - especially so developers don't have to
 818      * deal with the hang!
 819      */
 820     ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_NEVER_LAUNCHED);
 821     return rc;
 822 }
 823 
 824 static int setup_path(orte_app_context_t *app, char **wdir)
 825 {
 826     int rc=ORTE_SUCCESS;
 827     char dir[MAXPATHLEN];
 828 
 829     if (!orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) {
 830         /* Try to change to the app's cwd and check that the app
 831            exists and is executable The function will
 832            take care of outputting a pretty error message, if required
 833         */
 834         if (ORTE_SUCCESS != (rc = orte_util_check_context_cwd(app, true))) {
 835             /* do not ERROR_LOG - it will be reported elsewhere */
 836             goto CLEANUP;
 837         }
 838 
 839         /* The prior function will have done a chdir() to jump us to
 840          * wherever the app is to be executed. This could be either where
 841          * the user specified (via -wdir), or to the user's home directory
 842          * on this node if nothing was provided. It seems that chdir doesn't
 843          * adjust the $PWD enviro variable when it changes the directory. This
 844          * can cause a user to get a different response when doing getcwd vs
 845          * looking at the enviro variable. To keep this consistent, we explicitly
 846          * ensure that the PWD enviro variable matches the CWD we moved to.
 847          *
 848          * NOTE: if a user's program does a chdir(), then $PWD will once
 849          * again not match getcwd! This is beyond our control - we are only
 850          * ensuring they start out matching.
 851          */
 852         if (NULL == getcwd(dir, sizeof(dir))) {
 853             return ORTE_ERR_OUT_OF_RESOURCE;
 854         }
 855         *wdir = strdup(dir);
 856         opal_setenv("PWD", dir, true, &app->env);
 857         /* update the initial wdir value too */
 858         opal_setenv(OPAL_MCA_PREFIX"initial_wdir", dir, true, &app->env);
 859     } else {
 860         *wdir = NULL;
 861     }
 862 
 863  CLEANUP:
 864     return rc;
 865 }
 866 
 867 
 868 /* define a timer release point so that we can wait for
 869  * file descriptors to come available, if necessary
 870  */
 871 static void timer_cb(int fd, short event, void *cbdata)
 872 {
 873     orte_timer_t *tm = (orte_timer_t*)cbdata;
 874     orte_odls_launch_local_t *ll = (orte_odls_launch_local_t*)tm->payload;
 875 
 876     ORTE_ACQUIRE_OBJECT(tm);
 877 
 878     /* increment the number of retries */
 879     ll->retries++;
 880 
 881     /* re-attempt the launch */
 882     opal_event_active(ll->ev, OPAL_EV_WRITE, 1);
 883 
 884     /* release the timer event */
 885     OBJ_RELEASE(tm);
 886 }
 887 
 888 static int compute_num_procs_alive(orte_jobid_t job)
 889 {
 890     int i;
 891     orte_proc_t *child;
 892     int num_procs_alive = 0;
 893 
 894     for (i=0; i < orte_local_children->size; i++) {
 895         if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
 896             continue;
 897         }
 898         if (!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) {
 899             continue;
 900         }
 901         /* do not include members of the specified job as they
 902          * will be added later, if required
 903          */
 904         if (job == child->name.jobid) {
 905             continue;
 906         }
 907         num_procs_alive++;
 908     }
 909     return num_procs_alive;
 910 }
 911 
 912 void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
 913 {
 914     orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cbdata;
 915     orte_job_t *jobdat = cd->jdata;
 916     orte_app_context_t *app = cd->app;
 917     orte_proc_t *child = cd->child;
 918     int rc, i;
 919     bool found;
 920     orte_proc_state_t state;
 921 
 922     ORTE_ACQUIRE_OBJECT(cd);
 923 
 924     /* thread-protect common values */
 925     cd->env = opal_argv_copy(app->env);
 926 
 927     /* ensure we clear any prior info regarding state or exit status in
 928      * case this is a restart
 929      */
 930     child->exit_code = 0;
 931     ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID);
 932 
 933     /* setup the pmix environment */
 934     if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &cd->env))) {
 935         ORTE_ERROR_LOG(rc);
 936         state = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
 937         goto errorout;
 938     }
 939 
 940     /* if we are not forwarding output for this job, then
 941      * flag iof as complete
 942      */
 943     if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
 944         ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
 945     } else {
 946         ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
 947     }
 948     child->pid = 0;
 949     if (NULL != child->rml_uri) {
 950         free(child->rml_uri);
 951         child->rml_uri = NULL;
 952     }
 953 
 954     /* setup the rest of the environment with the proc-specific items - these
 955      * will be overwritten for each child
 956      */
 957     if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &cd->env))) {
 958         ORTE_ERROR_LOG(rc);
 959         state = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
 960         goto errorout;
 961     }
 962 
 963     /* did the user request we display output in xterms? */
 964     if (NULL != orte_xterm && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
 965         opal_list_item_t *nmitem;
 966         orte_namelist_t *nm;
 967         /* see if this rank is one of those requested */
 968         found = false;
 969         for (nmitem = opal_list_get_first(&orte_odls_globals.xterm_ranks);
 970              nmitem != opal_list_get_end(&orte_odls_globals.xterm_ranks);
 971              nmitem = opal_list_get_next(nmitem)) {
 972             nm = (orte_namelist_t*)nmitem;
 973             if (ORTE_VPID_WILDCARD == nm->name.vpid ||
 974                 child->name.vpid == nm->name.vpid) {
 975                 /* we want this one - modify the app's command to include
 976                  * the orte xterm cmd that starts with the xtermcmd */
 977                 cd->argv = opal_argv_copy(orte_odls_globals.xtermcmd);
 978                 /* insert the rank into the correct place as a window title */
 979                 free(cd->argv[2]);
 980                 opal_asprintf(&cd->argv[2], "Rank %s", ORTE_VPID_PRINT(child->name.vpid));
 981                 /* add in the argv from the app */
 982                 for (i=0; NULL != app->argv[i]; i++) {
 983                     opal_argv_append_nosize(&cd->argv, app->argv[i]);
 984                 }
 985                 /* use the xterm cmd as the app string */
 986                 cd->cmd = strdup(orte_odls_globals.xtermcmd[0]);
 987                 found = true;
 988                 break;
 989             } else if (jobdat->num_procs <= nm->name.vpid) {  /* check for bozo case */
 990                 /* can't be done! */
 991                 orte_show_help("help-orte-odls-base.txt",
 992                                "orte-odls-base:xterm-rank-out-of-bounds",
 993                                true, orte_process_info.nodename,
 994                                nm->name.vpid, jobdat->num_procs);
 995                 state = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
 996                 goto errorout;
 997             }
 998         }
 999         if (!found) {
1000             cd->cmd = strdup(app->app);
1001             cd->argv = opal_argv_copy(app->argv);
1002         }
1003     } else if (NULL != orte_fork_agent) {
1004         /* we were given a fork agent - use it */
1005         cd->argv = opal_argv_copy(orte_fork_agent);
1006         /* add in the argv from the app */
1007         for (i=0; NULL != app->argv[i]; i++) {
1008             opal_argv_append_nosize(&cd->argv, app->argv[i]);
1009         }
1010         cd->cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL);
1011         if (NULL == cd->cmd) {
1012             orte_show_help("help-orte-odls-base.txt",
1013                            "orte-odls-base:fork-agent-not-found",
1014                            true, orte_process_info.nodename, orte_fork_agent[0]);
1015             state = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
1016             goto errorout;
1017         }
1018     } else {
1019         cd->cmd = strdup(app->app);
1020         cd->argv = opal_argv_copy(app->argv);
1021     }
1022 
1023     /* if we are indexing the argv by rank, do so now */
1024     if (cd->index_argv && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
1025         char *param;
1026         opal_asprintf(&param, "%s-%d", cd->argv[0], (int)child->name.vpid);
1027         free(cd->argv[0]);
1028         cd->argv[0] = param;
1029     }
1030 
1031     opal_output_verbose(5, orte_odls_base_framework.framework_output,
1032                         "%s odls:launch spawning child %s",
1033                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1034                         ORTE_NAME_PRINT(&child->name));
1035 
1036     if (15 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) {
1037         /* dump what is going to be exec'd */
1038         opal_dss.dump(orte_odls_base_framework.framework_output, app, ORTE_APP_CONTEXT);
1039     }
1040 
1041     if (ORTE_SUCCESS != (rc = cd->fork_local(cd))) {
1042         /* error message already output */
1043         state = ORTE_PROC_STATE_FAILED_TO_START;
1044         goto errorout;
1045     }
1046 
1047     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_RUNNING);
1048     OBJ_RELEASE(cd);
1049     return;
1050 
1051   errorout:
1052     ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE);
1053     child->exit_code = rc;
1054     ORTE_ACTIVATE_PROC_STATE(&child->name, state);
1055     OBJ_RELEASE(cd);
1056 }
1057 
1058 void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
1059 {
1060     orte_app_context_t *app;
1061     orte_proc_t *child=NULL;
1062     int rc=ORTE_SUCCESS;
1063     char basedir[MAXPATHLEN];
1064     int j, idx;
1065     int total_num_local_procs = 0;
1066     orte_odls_launch_local_t *caddy = (orte_odls_launch_local_t*)cbdata;
1067     orte_job_t *jobdat;
1068     orte_jobid_t job = caddy->job;
1069     orte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local;
1070     bool index_argv;
1071     char *msg;
1072     orte_odls_spawn_caddy_t *cd;
1073     opal_event_base_t *evb;
1074     char *effective_dir = NULL;
1075     char **argvptr;
1076     char *pathenv = NULL, *mpiexec_pathenv = NULL;
1077     char *full_search;
1078 
1079     ORTE_ACQUIRE_OBJECT(caddy);
1080 
1081     opal_output_verbose(5, orte_odls_base_framework.framework_output,
1082                         "%s local:launch",
1083                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
1084 
1085     /* establish our baseline working directory - we will be potentially
1086      * bouncing around as we execute various apps, but we will always return
1087      * to this place as our default directory
1088      */
1089     if (NULL == getcwd(basedir, sizeof(basedir))) {
1090         ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FAILED_TO_LAUNCH);
1091         goto ERROR_OUT;
1092     }
1093     /* find the jobdat for this job */
1094     if (NULL == (jobdat = orte_get_job_data_object(job))) {
1095         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1096         /* not much we can do here - we are just hosed, so
1097          * report that to the error manager
1098          */
1099         ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FAILED_TO_LAUNCH);
1100         goto ERROR_OUT;
1101     }
1102 
1103     /* do we have any local procs to launch? */
1104     if (0 == jobdat->num_local_procs) {
1105         /* indicate that we are done trying to launch them */
1106         opal_output_verbose(5, orte_odls_base_framework.framework_output,
1107                             "%s local:launch no local procs",
1108                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
1109         goto GETOUT;
1110     }
1111 
1112     /* track if we are indexing argvs so we don't check every time */
1113     index_argv = orte_get_attribute(&jobdat->attributes, ORTE_JOB_INDEX_ARGV, NULL, OPAL_BOOL);
1114 
1115     /* compute the total number of local procs currently alive and about to be launched */
1116     total_num_local_procs = compute_num_procs_alive(job) + jobdat->num_local_procs;
1117 
1118     /* check the system limits - if we are at our max allowed children, then
1119      * we won't be allowed to do this anyway, so we may as well abort now.
1120      * According to the documentation, num_procs = 0 is equivalent to
1121      * no limit, so treat it as unlimited here.
1122      */
1123     if (0 < opal_sys_limits.num_procs) {
1124         OPAL_OUTPUT_VERBOSE((10,  orte_odls_base_framework.framework_output,
1125                              "%s checking limit on num procs %d #children needed %d",
1126                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1127                              opal_sys_limits.num_procs, total_num_local_procs));
1128         if (opal_sys_limits.num_procs < total_num_local_procs) {
1129             if (2 < caddy->retries) {
1130                 /* if we have already tried too many times, then just give up */
1131                 ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_FAILED_TO_LAUNCH);
1132                 goto ERROR_OUT;
1133             }
1134             /* set a timer event so we can retry later - this
1135              * gives the system a chance to let other procs
1136              * terminate, thus creating room for new ones
1137              */
1138             ORTE_DETECT_TIMEOUT(1000, 1000, -1, timer_cb, caddy);
1139             return;
1140         }
1141     }
1142 
1143     /* check to see if we have enough available file descriptors
1144      * to launch these children - if not, then let's wait a little
1145      * while to see if some come free. This can happen if we are
1146      * in a tight loop over comm_spawn
1147      */
1148     if (0 < opal_sys_limits.num_files) {
1149         int limit;
1150         limit = 4*total_num_local_procs + 6*jobdat->num_local_procs;
1151         OPAL_OUTPUT_VERBOSE((10,  orte_odls_base_framework.framework_output,
1152                              "%s checking limit on file descriptors %d need %d",
1153                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1154                              opal_sys_limits.num_files, limit));
1155         if (opal_sys_limits.num_files < limit) {
1156             if (2 < caddy->retries) {
1157                 /* tried enough - give up */
1158                 for (idx=0; idx < orte_local_children->size; idx++) {
1159                     if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1160                         continue;
1161                     }
1162                     if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID)) {
1163                         child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
1164                         ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1165                     }
1166                 }
1167                 goto ERROR_OUT;
1168             }
1169             /* don't have enough - wait a little time */
1170             ORTE_DETECT_TIMEOUT(1000, 1000, -1, timer_cb, caddy);
1171             return;
1172         }
1173     }
1174 
1175     for (j=0; j < jobdat->apps->size; j++) {
1176         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) {
1177             continue;
1178         }
1179 
1180         /* if this app isn't being used on our node, skip it */
1181         if (!ORTE_FLAG_TEST(app, ORTE_APP_FLAG_USED_ON_NODE)) {
1182             opal_output_verbose(5, orte_odls_base_framework.framework_output,
1183                                 "%s app %d not used on node",
1184                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
1185             continue;
1186         }
1187 
1188         /* setup the environment for this app */
1189         if (ORTE_SUCCESS != (rc = orte_schizo.setup_fork(jobdat, app))) {
1190 
1191             OPAL_OUTPUT_VERBOSE((10, orte_odls_base_framework.framework_output,
1192                                  "%s odls:launch:setup_fork failed with error %s",
1193                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1194                                  ORTE_ERROR_NAME(rc)));
1195 
1196             /* do not ERROR_LOG this failure - it will be reported
1197              * elsewhere. The launch is going to fail. Since we could have
1198              * multiple app_contexts, we need to ensure that we flag only
1199              * the correct one that caused this operation to fail. We then have
1200              * to flag all the other procs from the app_context as having "not failed"
1201              * so we can report things out correctly
1202              */
1203             /* cycle through children to find those for this jobid */
1204             for (idx=0; idx < orte_local_children->size; idx++) {
1205                 if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1206                     continue;
1207                 }
1208                 if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) &&
1209                     j == (int)child->app_idx) {
1210                     child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
1211                     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1212                 }
1213             }
1214             goto GETOUT;
1215         }
1216 
1217         /* setup the working directory for this app - will jump us
1218          * to that directory
1219          */
1220         if (ORTE_SUCCESS != (rc = setup_path(app, &effective_dir))) {
1221             OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1222                                  "%s odls:launch:setup_path failed with error %s(%d)",
1223                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1224                                  ORTE_ERROR_NAME(rc), rc));
1225             /* do not ERROR_LOG this failure - it will be reported
1226              * elsewhere. The launch is going to fail. Since we could have
1227              * multiple app_contexts, we need to ensure that we flag only
1228              * the correct one that caused this operation to fail. We then have
1229              * to flag all the other procs from the app_context as having "not failed"
1230              * so we can report things out correctly
1231              */
1232             /* cycle through children to find those for this jobid */
1233             for (idx=0; idx < orte_local_children->size; idx++) {
1234                 if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1235                     continue;
1236                 }
1237                 if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) &&
1238                     j == (int)child->app_idx) {
1239                     child->exit_code = rc;
1240                     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1241                 }
1242             }
1243             goto GETOUT;
1244         }
1245 
1246         /* setup any local files that were prepositioned for us */
1247         if (ORTE_SUCCESS != (rc = orte_filem.link_local_files(jobdat, app))) {
1248             /* cycle through children to find those for this jobid */
1249             for (idx=0; idx < orte_local_children->size; idx++) {
1250                 if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1251                     continue;
1252                 }
1253                 if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) &&
1254                     j == (int)child->app_idx) {
1255                     child->exit_code = rc;
1256                     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1257                 }
1258             }
1259             goto GETOUT;
1260         }
1261 
1262         /* Search for the OMPI_exec_path and PATH settings in the environment. */
1263         for (argvptr = app->env; *argvptr != NULL; argvptr++) {
1264             if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) {
1265                 mpiexec_pathenv = *argvptr + 15;
1266             }
1267             if (0 == strncmp("PATH=", *argvptr, 5)) {
1268                 pathenv = *argvptr + 5;
1269             }
1270         }
1271 
1272         /* If OMPI_exec_path is set (meaning --path was used), then create a
1273            temporary environment to be used in the search for the executable.
1274            The PATH setting in this temporary environment is a combination of
1275            the OMPI_exec_path and PATH values.  If OMPI_exec_path is not set,
1276            then just use existing environment with PATH in it.  */
1277         if (NULL != mpiexec_pathenv) {
1278             argvptr = NULL;
1279             if (pathenv != NULL) {
1280                 opal_asprintf(&full_search, "%s:%s", mpiexec_pathenv, pathenv);
1281             } else {
1282                 opal_asprintf(&full_search, "%s", mpiexec_pathenv);
1283             }
1284             opal_setenv("PATH", full_search, true, &argvptr);
1285             free(full_search);
1286         } else {
1287             argvptr = app->env;
1288         }
1289 
1290         rc = orte_util_check_context_app(app, argvptr);
1291         /* do not ERROR_LOG - it will be reported elsewhere */
1292         if (NULL != mpiexec_pathenv) {
1293             opal_argv_free(argvptr);
1294         }
1295         if (ORTE_SUCCESS != rc) {
1296             /* cycle through children to find those for this jobid */
1297             for (idx=0; idx < orte_local_children->size; idx++) {
1298                 if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1299                     continue;
1300                 }
1301                 if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) &&
1302                     j == (int)child->app_idx) {
1303                     child->exit_code = rc;
1304                     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1305                 }
1306             }
1307             goto GETOUT;
1308         }
1309 
1310 
1311         /* tell all children that they are being launched via ORTE */
1312         opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &app->env);
1313 
1314         /* if the user requested it, set the system resource limits */
1315         if (OPAL_SUCCESS != (rc = opal_util_init_sys_limits(&msg))) {
1316             orte_show_help("help-orte-odls-default.txt", "set limit", true,
1317                            orte_process_info.nodename, app,
1318                            __FILE__, __LINE__, msg);
1319             /* cycle through children to find those for this jobid */
1320             for (idx=0; idx < orte_local_children->size; idx++) {
1321                 if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1322                     continue;
1323                 }
1324                 if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) &&
1325                     j == (int)child->app_idx) {
1326                     child->exit_code = rc;
1327                     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1328                 }
1329             }
1330             goto GETOUT;
1331         }
1332 
1333         /* reset our working directory back to our default location - if we
1334          * don't do this, then we will be looking for relative paths starting
1335          * from the last wdir option specified by the user. Thus, we would
1336          * be requiring that the user keep track on the cmd line of where
1337          * each app was located relative to the prior app, instead of relative
1338          * to their current location
1339          */
1340         if (0 != chdir(basedir)) {
1341             ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1342             goto GETOUT;
1343         }
1344 
1345         /* okay, now let's launch all the local procs for this app using the provided fork_local fn */
1346         for (idx=0; idx < orte_local_children->size; idx++) {
1347             if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1348                 continue;
1349             }
1350             /* does this child belong to this app? */
1351             if (j != (int)child->app_idx) {
1352                 continue;
1353             }
1354 
1355             /* is this child already alive? This can happen if
1356              * we are asked to launch additional processes.
1357              * If it has been launched, then do nothing
1358              */
1359             if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) {
1360 
1361                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1362                                      "%s odls:launch child %s has already been launched",
1363                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1364                                      ORTE_NAME_PRINT(&child->name)));
1365 
1366                 continue;
1367             }
1368             /* is this child a candidate to start? it may not be alive
1369              * because it already executed
1370              */
1371             if (ORTE_PROC_STATE_INIT != child->state &&
1372                 ORTE_PROC_STATE_RESTART != child->state) {
1373                 continue;
1374             }
1375             /* do we have a child from the specified job. Because the
1376              * job could be given as a WILDCARD value, we must use
1377              * the dss.compare function to check for equality.
1378              */
1379             if (OPAL_EQUAL != opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID)) {
1380 
1381                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1382                                      "%s odls:launch child %s is not in job %s being launched",
1383                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1384                                      ORTE_NAME_PRINT(&child->name),
1385                                      ORTE_JOBID_PRINT(job)));
1386 
1387                 continue;
1388             }
1389 
1390             OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1391                                  "%s odls:launch working child %s",
1392                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1393                                  ORTE_NAME_PRINT(&child->name)));
1394 
1395             /* determine the thread that will handle this child */
1396             ++orte_odls_globals.next_base;
1397             if (orte_odls_globals.num_threads <= orte_odls_globals.next_base) {
1398                 orte_odls_globals.next_base = 0;
1399             }
1400             evb = orte_odls_globals.ev_bases[orte_odls_globals.next_base];
1401 
1402             /* set the waitpid callback here for thread protection and
1403              * to ensure we can capture the callback on shortlived apps */
1404             ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE);
1405             orte_wait_cb(child, orte_odls_base_default_wait_local_proc, evb, NULL);
1406 
1407             /* dispatch this child to the next available launch thread */
1408             cd = OBJ_NEW(orte_odls_spawn_caddy_t);
1409             if (NULL != effective_dir) {
1410                 cd->wdir = strdup(effective_dir);
1411             }
1412             cd->jdata = jobdat;
1413             cd->app = app;
1414             cd->child = child;
1415             cd->fork_local = fork_local;
1416             cd->index_argv = index_argv;
1417             /* setup any IOF */
1418             cd->opts.usepty = OPAL_ENABLE_PTY_SUPPORT;
1419 
1420             /* do we want to setup stdin? */
1421             if (jobdat->stdin_target == ORTE_VPID_WILDCARD ||
1422                  child->name.vpid == jobdat->stdin_target) {
1423                 cd->opts.connect_stdin = true;
1424             } else {
1425                 cd->opts.connect_stdin = false;
1426             }
1427             if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&cd->opts))) {
1428                 ORTE_ERROR_LOG(rc);
1429                 child->exit_code = rc;
1430                 OBJ_RELEASE(cd);
1431                 ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1432                 goto GETOUT;
1433             }
1434             if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
1435                 /* connect endpoints IOF */
1436                 rc = orte_iof_base_setup_parent(&child->name, &cd->opts);
1437                 if (ORTE_SUCCESS != rc) {
1438                     ORTE_ERROR_LOG(rc);
1439                     OBJ_RELEASE(cd);
1440                     ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1441                     goto GETOUT;
1442                 }
1443             }
1444             opal_output_verbose(1, orte_odls_base_framework.framework_output,
1445                                 "%s odls:dispatch %s to thread %d",
1446                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1447                                 ORTE_NAME_PRINT(&child->name),
1448                                 orte_odls_globals.next_base);
1449             opal_event_set(evb, &cd->ev, -1,
1450                            OPAL_EV_WRITE, orte_odls_base_spawn_proc, cd);
1451             opal_event_set_priority(&cd->ev, ORTE_MSG_PRI);
1452             opal_event_active(&cd->ev, OPAL_EV_WRITE, 1);
1453 
1454         }
1455         if (NULL != effective_dir) {
1456             free(effective_dir);
1457             effective_dir = NULL;
1458         }
1459     }
1460 
1461   GETOUT:
1462     if (NULL != effective_dir) {
1463         free(effective_dir);
1464         effective_dir = NULL;
1465     }
1466 
1467   ERROR_OUT:
1468     /* ensure we reset our working directory back to our default location  */
1469     if (0 != chdir(basedir)) {
1470         ORTE_ERROR_LOG(ORTE_ERROR);
1471     }
1472     /* release the event */
1473     OBJ_RELEASE(caddy);
1474 }
1475 
1476 /**
1477 *  Pass a signal to my local procs
1478  */
1479 
1480 int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, int32_t signal,
1481                                               orte_odls_base_signal_local_fn_t signal_local)
1482 {
1483     int rc, i;
1484     orte_proc_t *child;
1485 
1486     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1487                          "%s odls: signaling proc %s",
1488                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1489                          (NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc)));
1490 
1491     /* if procs is NULL, then we want to signal all
1492      * of the local procs, so just do that case
1493      */
1494     if (NULL == proc) {
1495         rc = ORTE_SUCCESS;  /* pre-set this as an empty list causes us to drop to bottom */
1496         for (i=0; i < orte_local_children->size; i++) {
1497             if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
1498                 continue;
1499             }
1500             if (0 == child->pid || !ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) {
1501                 /* skip this one as the child isn't alive */
1502                 continue;
1503             }
1504             if (ORTE_SUCCESS != (rc = signal_local(child->pid, (int)signal))) {
1505                 ORTE_ERROR_LOG(rc);
1506             }
1507         }
1508         return rc;
1509     }
1510 
1511     /* we want it sent to some specified process, so find it */
1512     for (i=0; i < orte_local_children->size; i++) {
1513         if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
1514             continue;
1515         }
1516         if (OPAL_EQUAL == opal_dss.compare(&(child->name), (orte_process_name_t*)proc, ORTE_NAME)) {
1517             if (ORTE_SUCCESS != (rc = signal_local(child->pid, (int)signal))) {
1518                 ORTE_ERROR_LOG(rc);
1519             }
1520             return rc;
1521         }
1522     }
1523 
1524     /* only way to get here is if we couldn't find the specified proc.
1525      * report that as an error and return it
1526      */
1527     ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1528     return ORTE_ERR_NOT_FOUND;
1529 }
1530 
1531 /*
1532  *  Wait for a callback indicating the child has completed.
1533  */
1534 
1535 void orte_odls_base_default_wait_local_proc(int fd, short sd, void *cbdata)
1536 {
1537     orte_wait_tracker_t *t2 = (orte_wait_tracker_t*)cbdata;
1538     orte_proc_t *proc = t2->child;
1539     int i;
1540     orte_job_t *jobdat;
1541     orte_proc_state_t state=ORTE_PROC_STATE_WAITPID_FIRED;
1542     orte_proc_t *cptr;
1543 
1544     opal_output_verbose(5, orte_odls_base_framework.framework_output,
1545                         "%s odls:wait_local_proc child process %s pid %ld terminated",
1546                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1547                         ORTE_NAME_PRINT(&proc->name), (long)proc->pid);
1548 
1549     /* if the child was previously flagged as dead, then just
1550      * update its exit status and
1551      * ensure that its exit state gets reported to avoid hanging
1552      * don't forget to check if the process was signaled.
1553      */
1554     if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_ALIVE)) {
1555         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1556                              "%s odls:waitpid_fired child %s was already dead exit code %d",
1557                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1558                              ORTE_NAME_PRINT(&proc->name),proc->exit_code));
1559         if (WIFEXITED(proc->exit_code)) {
1560             proc->exit_code = WEXITSTATUS(proc->exit_code);
1561             if (0 != proc->exit_code) {
1562                 state = ORTE_PROC_STATE_TERM_NON_ZERO;
1563             }
1564         } else {
1565             if (WIFSIGNALED(proc->exit_code)) {
1566                 state = ORTE_PROC_STATE_ABORTED_BY_SIG;
1567                 proc->exit_code = WTERMSIG(proc->exit_code) + 128;
1568             }
1569         }
1570         goto MOVEON;
1571     }
1572 
1573     /* if the proc called "abort", then we just need to flag that it
1574      * came thru here */
1575     if (ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_ABORT)) {
1576         /* even though the process exited "normally", it happened
1577          * via an orte_abort call
1578          */
1579         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1580                              "%s odls:waitpid_fired child %s died by call to abort",
1581                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1582                              ORTE_NAME_PRINT(&proc->name)));
1583         state = ORTE_PROC_STATE_CALLED_ABORT;
1584         /* regardless of our eventual code path, we need to
1585          * flag that this proc has had its waitpid fired */
1586         ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_WAITPID);
1587         goto MOVEON;
1588     }
1589 
1590     /* get the jobdat for this child */
1591     if (NULL == (jobdat = orte_get_job_data_object(proc->name.jobid))) {
1592         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1593         goto MOVEON;
1594     }
1595 
1596     /* if this is a debugger daemon, then just report the state
1597      * and return as we aren't monitoring it
1598      */
1599     if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON))  {
1600         goto MOVEON;
1601     }
1602 
1603     /* if this child was ordered to die, then just pass that along
1604      * so we don't hang
1605      */
1606     if (ORTE_PROC_STATE_KILLED_BY_CMD == proc->state) {
1607         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1608                              "%s odls:waitpid_fired child %s was ordered to die",
1609                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1610                              ORTE_NAME_PRINT(&proc->name)));
1611         /* regardless of our eventual code path, we need to
1612          * flag that this proc has had its waitpid fired */
1613         ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_WAITPID);
1614         goto MOVEON;
1615     }
1616 
1617     /* determine the state of this process */
1618     if (WIFEXITED(proc->exit_code)) {
1619 
1620         /* set the exit status appropriately */
1621         proc->exit_code = WEXITSTATUS(proc->exit_code);
1622 
1623         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1624                              "%s odls:waitpid_fired child %s exit code %d",
1625                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1626                              ORTE_NAME_PRINT(&proc->name), proc->exit_code));
1627 
1628         /* provide a default state */
1629         state = ORTE_PROC_STATE_WAITPID_FIRED;
1630 
1631         /* check to see if a sync was required and if it was received */
1632         if (ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_REG)) {
1633             if (ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_HAS_DEREG) ||
1634                 orte_allowed_exit_without_sync || 0 != proc->exit_code) {
1635                 /* if we did recv a finalize sync, or one is not required,
1636                  * then declare it normally terminated
1637                  * unless it returned with a non-zero status indicating the code
1638                  * felt it was non-normal - in this latter case, we do not
1639                  * require that the proc deregister before terminating
1640                  */
1641                 if (0 != proc->exit_code && orte_abort_non_zero_exit) {
1642                     state = ORTE_PROC_STATE_TERM_NON_ZERO;
1643                     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1644                                          "%s odls:waitpid_fired child process %s terminated normally "
1645                                          "but with a non-zero exit status - it "
1646                                          "will be treated as an abnormal termination",
1647                                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1648                                          ORTE_NAME_PRINT(&proc->name)));
1649                 } else {
1650                     /* indicate the waitpid fired */
1651                     state = ORTE_PROC_STATE_WAITPID_FIRED;
1652                 }
1653             } else {
1654                 /* we required a finalizing sync and didn't get it, so this
1655                  * is considered an abnormal termination and treated accordingly
1656                  */
1657                 state = ORTE_PROC_STATE_TERM_WO_SYNC;
1658                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1659                                      "%s odls:waitpid_fired child process %s terminated normally "
1660                                      "but did not provide a required finalize sync - it "
1661                                      "will be treated as an abnormal termination",
1662                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1663                                      ORTE_NAME_PRINT(&proc->name)));
1664             }
1665         } else {
1666             /* has any child in this job already registered? */
1667             for (i=0; i < orte_local_children->size; i++) {
1668                 if (NULL == (cptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
1669                     continue;
1670                 }
1671                 if (cptr->name.jobid != proc->name.jobid) {
1672                     continue;
1673                 }
1674                 if (ORTE_FLAG_TEST(cptr, ORTE_PROC_FLAG_REG) && !orte_allowed_exit_without_sync) {
1675                     /* someone has registered, and we didn't before
1676                      * terminating - this is an abnormal termination unless
1677                      * the allowed_exit_without_sync flag is set
1678                      */
1679                     if (0 != proc->exit_code) {
1680                         state = ORTE_PROC_STATE_TERM_NON_ZERO;
1681                         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1682                                              "%s odls:waitpid_fired child process %s terminated normally "
1683                                              "but with a non-zero exit status - it "
1684                                              "will be treated as an abnormal termination",
1685                                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1686                                              ORTE_NAME_PRINT(&proc->name)));
1687                     } else {
1688                         state = ORTE_PROC_STATE_TERM_WO_SYNC;
1689                         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1690                                              "%s odls:waitpid_fired child process %s terminated normally "
1691                                              "but did not provide a required init sync - it "
1692                                              "will be treated as an abnormal termination",
1693                                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1694                                              ORTE_NAME_PRINT(&proc->name)));
1695                     }
1696                     goto MOVEON;
1697                 }
1698             }
1699             /* if no child has registered, then it is possible that
1700              * none of them will. This is considered acceptable. Still
1701              * flag it as abnormal if the exit code was non-zero
1702              */
1703             if (0 != proc->exit_code && orte_abort_non_zero_exit) {
1704                 state = ORTE_PROC_STATE_TERM_NON_ZERO;
1705             } else {
1706                 state = ORTE_PROC_STATE_WAITPID_FIRED;
1707             }
1708         }
1709 
1710         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1711                              "%s odls:waitpid_fired child process %s terminated %s",
1712                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1713                              ORTE_NAME_PRINT(&proc->name),
1714                              (0 == proc->exit_code) ? "normally" : "with non-zero status"));
1715     } else {
1716         /* the process was terminated with a signal! That's definitely
1717          * abnormal, so indicate that condition
1718          */
1719         state = ORTE_PROC_STATE_ABORTED_BY_SIG;
1720         /* If a process was killed by a signal, then make the
1721          * exit code of orterun be "signo + 128" so that "prog"
1722          * and "orterun prog" will both yield the same exit code.
1723          *
1724          * This is actually what the shell does for you when
1725          * a process dies by signal, so this makes orterun treat
1726          * the termination code to exit status translation the
1727          * same way
1728          */
1729         proc->exit_code = WTERMSIG(proc->exit_code) + 128;
1730 
1731         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1732                              "%s odls:waitpid_fired child process %s terminated with signal",
1733                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1734                              ORTE_NAME_PRINT(&proc->name) ));
1735         /* Do not decrement the number of local procs here. That is handled in the errmgr */
1736     }
1737 
1738  MOVEON:
1739     /* cancel the wait as this proc has already terminated */
1740     orte_wait_cb_cancel(proc);
1741     ORTE_ACTIVATE_PROC_STATE(&proc->name, state);
1742     /* cleanup the tracker */
1743     OBJ_RELEASE(t2);
1744 }
1745 
1746 typedef struct {
1747     opal_list_item_t super;
1748     orte_proc_t *child;
1749 } orte_odls_quick_caddy_t;
1750 static void qcdcon(orte_odls_quick_caddy_t *p)
1751 {
1752     p->child = NULL;
1753 }
1754 static void qcddes(orte_odls_quick_caddy_t *p)
1755 {
1756     if (NULL != p->child) {
1757         OBJ_RELEASE(p->child);
1758     }
1759 }
1760 OBJ_CLASS_INSTANCE(orte_odls_quick_caddy_t,
1761                    opal_list_item_t,
1762                    qcdcon, qcddes);
1763 
1764 int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
1765                                             orte_odls_base_kill_local_fn_t kill_local)
1766 {
1767     orte_proc_t *child;
1768     opal_list_t procs_killed;
1769     orte_proc_t *proc, proctmp;
1770     int i, j;
1771     opal_pointer_array_t procarray, *procptr;
1772     bool do_cleanup;
1773     orte_odls_quick_caddy_t *cd;
1774 
1775     OBJ_CONSTRUCT(&procs_killed, opal_list_t);
1776 
1777     /* if the pointer array is NULL, then just kill everything */
1778     if (NULL == procs) {
1779         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1780                              "%s odls:kill_local_proc working on WILDCARD",
1781                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1782         OBJ_CONSTRUCT(&procarray, opal_pointer_array_t);
1783         opal_pointer_array_init(&procarray, 1, 1, 1);
1784         OBJ_CONSTRUCT(&proctmp, orte_proc_t);
1785         proctmp.name.jobid = ORTE_JOBID_WILDCARD;
1786         proctmp.name.vpid = ORTE_VPID_WILDCARD;
1787         opal_pointer_array_add(&procarray, &proctmp);
1788         procptr = &procarray;
1789         do_cleanup = true;
1790     } else {
1791         OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1792                              "%s odls:kill_local_proc working on provided array",
1793                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1794         procptr = procs;
1795         do_cleanup = false;
1796     }
1797 
1798     /* cycle through the provided array of processes to kill */
1799     for (i=0; i < procptr->size; i++) {
1800         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(procptr, i))) {
1801             continue;
1802         }
1803         for (j=0; j < orte_local_children->size; j++) {
1804             if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, j))) {
1805                 continue;
1806             }
1807 
1808             OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1809                                  "%s odls:kill_local_proc checking child process %s",
1810                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1811                                  ORTE_NAME_PRINT(&child->name)));
1812 
1813             /* do we have a child from the specified job? Because the
1814              *  job could be given as a WILDCARD value, we must
1815              *  check for that as well as for equality.
1816              */
1817             if (ORTE_JOBID_WILDCARD != proc->name.jobid &&
1818                 proc->name.jobid != child->name.jobid) {
1819 
1820                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1821                                      "%s odls:kill_local_proc child %s is not part of job %s",
1822                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1823                                      ORTE_NAME_PRINT(&child->name),
1824                                      ORTE_JOBID_PRINT(proc->name.jobid)));
1825                 continue;
1826             }
1827 
1828             /* see if this is the specified proc - could be a WILDCARD again, so check
1829              * appropriately
1830              */
1831             if (ORTE_VPID_WILDCARD != proc->name.vpid &&
1832                 proc->name.vpid != child->name.vpid) {
1833 
1834                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1835                                      "%s odls:kill_local_proc child %s is not covered by rank %s",
1836                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1837                                      ORTE_NAME_PRINT(&child->name),
1838                                      ORTE_VPID_PRINT(proc->name.vpid)));
1839                 continue;
1840             }
1841 
1842             /* is this process alive? if not, then nothing for us
1843              * to do to it
1844              */
1845             if (!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE) || 0 == child->pid) {
1846 
1847                 OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1848                                      "%s odls:kill_local_proc child %s is not alive",
1849                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1850                                      ORTE_NAME_PRINT(&child->name)));
1851 
1852                 /* ensure, though, that the state is terminated so we don't lockup if
1853                  * the proc never started
1854                  */
1855                 if (ORTE_PROC_STATE_UNDEF == child->state ||
1856                     ORTE_PROC_STATE_INIT == child->state ||
1857                     ORTE_PROC_STATE_RUNNING == child->state) {
1858                     /* we can't be sure what happened, but make sure we
1859                      * at least have a value that will let us eventually wakeup
1860                      */
1861                     child->state = ORTE_PROC_STATE_TERMINATED;
1862                     /* ensure we realize that the waitpid will never come, if
1863                      * it already hasn't
1864                      */
1865                     ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID);
1866                     child->pid = 0;
1867                     goto CLEANUP;
1868                 } else {
1869                     continue;
1870                 }
1871             }
1872 
1873             /* ensure the stdin IOF channel for this child is closed. The other
1874              * channels will automatically close when the proc is killed
1875              */
1876             if (NULL != orte_iof.close) {
1877                 orte_iof.close(&child->name, ORTE_IOF_STDIN);
1878             }
1879 
1880             /* cancel the waitpid callback as this induces unmanageable race
1881              * conditions when we are deliberately killing the process
1882              */
1883             orte_wait_cb_cancel(child);
1884 
1885             /* First send a SIGCONT in case the process is in stopped state.
1886                If it is in a stopped state and we do not first change it to
1887                running, then SIGTERM will not get delivered.  Ignore return
1888                value. */
1889             OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1890                                  "%s SENDING SIGCONT TO %s",
1891                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1892                                  ORTE_NAME_PRINT(&child->name)));
1893             cd = OBJ_NEW(orte_odls_quick_caddy_t);
1894             OBJ_RETAIN(child);
1895             cd->child = child;
1896             opal_list_append(&procs_killed, &cd->super);
1897             kill_local(child->pid, SIGCONT);
1898             continue;
1899 
1900         CLEANUP:
1901             /* ensure the child's session directory is cleaned up */
1902             orte_session_dir_finalize(&child->name);
1903             /* check for everything complete - this will remove
1904              * the child object from our local list
1905              */
1906             if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
1907                 ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) {
1908                 ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
1909             }
1910         }
1911     }
1912 
1913     /* if we are issuing signals, then we need to wait a little
1914      * and send the next in sequence */
1915     if (0 < opal_list_get_size(&procs_killed)) {
1916         sleep(orte_odls_globals.timeout_before_sigkill);
1917         /* issue a SIGTERM to all */
1918         OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
1919             OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1920                                  "%s SENDING SIGTERM TO %s",
1921                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1922                                  ORTE_NAME_PRINT(&cd->child->name)));
1923             kill_local(cd->child->pid, SIGTERM);
1924         }
1925         /* wait a little again */
1926         sleep(orte_odls_globals.timeout_before_sigkill);
1927         /* issue a SIGKILL to all */
1928         OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
1929             OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1930                                  "%s SENDING SIGKILL TO %s",
1931                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1932                                  ORTE_NAME_PRINT(&cd->child->name)));
1933             kill_local(cd->child->pid, SIGKILL);
1934             /* indicate the waitpid fired as this is effectively what
1935              * has happened
1936              */
1937             ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_WAITPID);
1938 
1939             /* Since we are not going to wait for this process, make sure
1940              * we mark it as not-alive so that we don't wait for it
1941              * in orted_cmd
1942              */
1943             ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE);
1944             cd->child->pid = 0;
1945 
1946             /* mark the child as "killed" */
1947             cd->child->state = ORTE_PROC_STATE_KILLED_BY_CMD;  /* we ordered it to die */
1948 
1949             /* ensure the child's session directory is cleaned up */
1950             orte_session_dir_finalize(&cd->child->name);
1951             /* check for everything complete - this will remove
1952              * the child object from our local list
1953              */
1954             if (ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
1955                 ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_WAITPID)) {
1956                 ORTE_ACTIVATE_PROC_STATE(&cd->child->name, cd->child->state);
1957             }
1958         }
1959     }
1960     OPAL_LIST_DESTRUCT(&procs_killed);
1961 
1962     /* cleanup arrays, if required */
1963     if (do_cleanup) {
1964         OBJ_DESTRUCT(&procarray);
1965         OBJ_DESTRUCT(&proctmp);
1966     }
1967 
1968     return ORTE_SUCCESS;
1969 }
1970 
1971 int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
1972                                   orte_process_name_t *proc)
1973 {
1974     int rc;
1975     orte_proc_t *child;
1976     opal_pstats_t stats, *statsptr;
1977     int i, j;
1978 
1979     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
1980                          "%s odls:get_proc_stats for proc %s",
1981                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1982                          ORTE_NAME_PRINT(proc)));
1983 
1984     /* find this child */
1985     for (i=0; i < orte_local_children->size; i++) {
1986         if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
1987             continue;
1988         }
1989 
1990         if (proc->jobid == child->name.jobid &&
1991             (proc->vpid == child->name.vpid ||
1992              ORTE_VPID_WILDCARD == proc->vpid)) { /* found it */
1993 
1994             OBJ_CONSTRUCT(&stats, opal_pstats_t);
1995             /* record node up to first '.' */
1996             for (j=0; j < (int)strlen(orte_process_info.nodename) &&
1997                  j < OPAL_PSTAT_MAX_STRING_LEN-1 &&
1998                  orte_process_info.nodename[j] != '.'; j++) {
1999                 stats.node[j] = orte_process_info.nodename[j];
2000             }
2001             /* record rank */
2002             stats.rank = child->name.vpid;
2003             /* get stats */
2004             rc = opal_pstat.query(child->pid, &stats, NULL);
2005             if (ORTE_SUCCESS != rc) {
2006                 OBJ_DESTRUCT(&stats);
2007                 return rc;
2008             }
2009             if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, proc, 1, ORTE_NAME))) {
2010                 ORTE_ERROR_LOG(rc);
2011                 OBJ_DESTRUCT(&stats);
2012                 return rc;
2013             }
2014             statsptr = &stats;
2015             if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &statsptr, 1, OPAL_PSTAT))) {
2016                 ORTE_ERROR_LOG(rc);
2017                 OBJ_DESTRUCT(&stats);
2018                 return rc;
2019             }
2020             OBJ_DESTRUCT(&stats);
2021         }
2022     }
2023 
2024     return ORTE_SUCCESS;
2025 }
2026 
2027 int orte_odls_base_default_restart_proc(orte_proc_t *child,
2028                                         orte_odls_base_fork_local_proc_fn_t fork_local)
2029 {
2030     int rc;
2031     orte_app_context_t *app;
2032     orte_job_t *jobdat;
2033     char basedir[MAXPATHLEN];
2034     char *wdir = NULL;
2035     orte_odls_spawn_caddy_t *cd;
2036     opal_event_base_t *evb;
2037 
2038     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
2039                          "%s odls:restart_proc for proc %s",
2040                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2041                          ORTE_NAME_PRINT(&child->name)));
2042 
2043     /* establish our baseline working directory - we will be potentially
2044      * bouncing around as we execute this app, but we will always return
2045      * to this place as our default directory
2046      */
2047     if (NULL == getcwd(basedir, sizeof(basedir))) {
2048         return ORTE_ERR_OUT_OF_RESOURCE;
2049     }
2050 
2051     /* find this child's jobdat */
2052     if (NULL == (jobdat = orte_get_job_data_object(child->name.jobid))) {
2053         /* not found */
2054         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
2055         return ORTE_ERR_NOT_FOUND;
2056     }
2057 
2058     child->state = ORTE_PROC_STATE_FAILED_TO_START;
2059     child->exit_code = 0;
2060     ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID);
2061     ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
2062     child->pid = 0;
2063     if (NULL != child->rml_uri) {
2064         free(child->rml_uri);
2065         child->rml_uri = NULL;
2066     }
2067     app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, child->app_idx);
2068 
2069     /* reset envars to match this child */
2070     if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &app->env))) {
2071         ORTE_ERROR_LOG(rc);
2072         goto CLEANUP;
2073     }
2074 
2075     /* setup the path */
2076     if (ORTE_SUCCESS != (rc = setup_path(app, &wdir))) {
2077         ORTE_ERROR_LOG(rc);
2078         if (NULL != wdir) {
2079             free(wdir);
2080         }
2081         goto CLEANUP;
2082     }
2083 
2084     /* dispatch this child to the next available launch thread */
2085     cd = OBJ_NEW(orte_odls_spawn_caddy_t);
2086     if (NULL != wdir) {
2087         cd->wdir = strdup(wdir);
2088         free(wdir);
2089     }
2090     cd->jdata = jobdat;
2091     cd->app = app;
2092     cd->child = child;
2093     cd->fork_local = fork_local;
2094     /* setup any IOF */
2095     cd->opts.usepty = OPAL_ENABLE_PTY_SUPPORT;
2096 
2097     /* do we want to setup stdin? */
2098     if (jobdat->stdin_target == ORTE_VPID_WILDCARD ||
2099          child->name.vpid == jobdat->stdin_target) {
2100         cd->opts.connect_stdin = true;
2101     } else {
2102         cd->opts.connect_stdin = false;
2103     }
2104     if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&cd->opts))) {
2105         ORTE_ERROR_LOG(rc);
2106         child->exit_code = rc;
2107         OBJ_RELEASE(cd);
2108         ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
2109         goto CLEANUP;
2110     }
2111     if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
2112         /* connect endpoints IOF */
2113         rc = orte_iof_base_setup_parent(&child->name, &cd->opts);
2114         if (ORTE_SUCCESS != rc) {
2115             ORTE_ERROR_LOG(rc);
2116             OBJ_RELEASE(cd);
2117             ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
2118             goto CLEANUP;
2119         }
2120     }
2121     ++orte_odls_globals.next_base;
2122     if (orte_odls_globals.num_threads <= orte_odls_globals.next_base) {
2123         orte_odls_globals.next_base = 0;
2124     }
2125     evb = orte_odls_globals.ev_bases[orte_odls_globals.next_base];
2126     orte_wait_cb(child, orte_odls_base_default_wait_local_proc, evb, NULL);
2127 
2128     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
2129                          "%s restarting app %s",
2130                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->app));
2131 
2132     opal_event_set(evb, &cd->ev, -1,
2133                    OPAL_EV_WRITE, orte_odls_base_spawn_proc, cd);
2134     opal_event_set_priority(&cd->ev, ORTE_MSG_PRI);
2135     opal_event_active(&cd->ev, OPAL_EV_WRITE, 1);
2136 
2137   CLEANUP:
2138     OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
2139                          "%s odls:restart of proc %s %s",
2140                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2141                          ORTE_NAME_PRINT(&child->name),
2142                          (ORTE_SUCCESS == rc) ? "succeeded" : "failed"));
2143 
2144     /* reset our working directory back to our default location - if we
2145      * don't do this, then we will be looking for relative paths starting
2146      * from the last wdir option specified by the user. Thus, we would
2147      * be requiring that the user keep track on the cmd line of where
2148      * each app was located relative to the prior app, instead of relative
2149      * to their current location
2150      */
2151     if (0 != chdir(basedir)) {
2152         ORTE_ERROR_LOG(ORTE_ERROR);
2153     }
2154 
2155     return rc;
2156 }

/* [<][>][^][v][top][bottom][index][help] */