root/orte/mca/plm/rsh/plm_rsh_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. caddy_const
  2. caddy_dest
  3. rsh_init
  4. rsh_wait_daemon
  5. setup_launch
  6. ssh_child
  7. remote_spawn
  8. rsh_launch
  9. process_launch_list
  10. launch_daemons
  11. rsh_terminate_orteds
  12. rsh_finalize
  13. set_handler_default
  14. find_shell
  15. launch_agent_setup
  16. rsh_probe
  17. setup_shell

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2007 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006-2007 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2007-2012 Los Alamos National Security, LLC.  All rights
  14  *                         reserved.
  15  * Copyright (c) 2008-2009 Sun Microsystems, Inc.  All rights reserved.
  16  * Copyright (c) 2011-2017 IBM Corporation.  All rights reserved.
  17  * Copyright (c) 2014-2019 Intel, Inc.  All rights reserved.
  18  * Copyright (c) 2015-2018 Research Organization for Information Science
  19  *                         and Technology (RIST). All rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  *
  26  * These symbols are in a file by themselves to provide nice linker
  27  * semantics.  Since linkers generally pull in symbols by object
  28  * files, keeping these symbols as the only symbols in this file
  29  * prevents utility programs such as "ompi_info" from having to import
  30  * entire components just to query their version and parameters.
  31  */
  32 
  33 #include "orte_config.h"
  34 #include "orte/constants.h"
  35 
  36 #include <stdlib.h>
  37 #ifdef HAVE_UNISTD_H
  38 #include <unistd.h>
  39 #endif
  40 #include <errno.h>
  41 #include <string.h>
  42 #ifdef HAVE_STRINGS_H
  43 #include <strings.h>
  44 #endif
  45 #ifdef HAVE_SYS_SELECT_H
  46 #include <sys/select.h>
  47 #endif
  48 #ifdef HAVE_SYS_TIME_H
  49 #include <sys/time.h>
  50 #endif
  51 #include <time.h>
  52 #ifdef HAVE_SYS_TYPES_H
  53 #include <sys/types.h>
  54 #endif
  55 #ifdef HAVE_SYS_STAT_H
  56 #include <sys/stat.h>
  57 #endif
  58 #ifdef HAVE_SYS_WAIT_H
  59 #include <sys/wait.h>
  60 #endif
  61 #include <fcntl.h>
  62 #include <signal.h>
  63 #ifdef HAVE_PWD_H
  64 #include <pwd.h>
  65 #endif
  66 
  67 #include "opal/mca/installdirs/installdirs.h"
  68 #include "opal/util/output.h"
  69 #include "opal/mca/base/base.h"
  70 #include "opal/mca/event/event.h"
  71 #include "opal/util/argv.h"
  72 #include "opal/util/opal_environ.h"
  73 #include "opal/util/basename.h"
  74 #include "opal/util/path.h"
  75 #include "opal/class/opal_pointer_array.h"
  76 
  77 #include "orte/util/show_help.h"
  78 #include "orte/runtime/orte_wait.h"
  79 #include "orte/runtime/orte_globals.h"
  80 #include "orte/util/name_fns.h"
  81 #include "orte/util/proc_info.h"
  82 #include "orte/util/threads.h"
  83 
  84 #include "orte/mca/rml/rml.h"
  85 #include "orte/mca/rml/rml_types.h"
  86 #include "orte/mca/ess/ess.h"
  87 #include "orte/mca/ess/base/base.h"
  88 #include "orte/mca/errmgr/errmgr.h"
  89 #include "orte/mca/grpcomm/base/base.h"
  90 #include "orte/mca/oob/base/base.h"
  91 #include "orte/mca/rmaps/rmaps.h"
  92 #include "orte/mca/routed/routed.h"
  93 #include "orte/mca/rml/base/rml_contact.h"
  94 #include "orte/mca/state/state.h"
  95 
  96 #include "orte/mca/plm/plm.h"
  97 #include "orte/mca/plm/base/base.h"
  98 #include "orte/mca/plm/base/plm_private.h"
  99 #include "orte/mca/plm/rsh/plm_rsh.h"
 100 
 101 static int rsh_init(void);
 102 static int rsh_launch(orte_job_t *jdata);
 103 static int remote_spawn(void);
 104 static int rsh_terminate_orteds(void);
 105 static int rsh_finalize(void);
 106 
 107 orte_plm_base_module_t orte_plm_rsh_module = {
 108     rsh_init,
 109     orte_plm_base_set_hnp_name,
 110     rsh_launch,
 111     remote_spawn,
 112     orte_plm_base_orted_terminate_job,
 113     rsh_terminate_orteds,
 114     orte_plm_base_orted_kill_local_procs,
 115     orte_plm_base_orted_signal_local_procs,
 116     rsh_finalize
 117 };
 118 
 119 typedef struct {
 120     opal_list_item_t super;
 121     int argc;
 122     char **argv;
 123     orte_proc_t *daemon;
 124 } orte_plm_rsh_caddy_t;
 125 static void caddy_const(orte_plm_rsh_caddy_t *ptr)
 126 {
 127     ptr->argv = NULL;
 128     ptr->daemon = NULL;
 129 }
 130 static void caddy_dest(orte_plm_rsh_caddy_t *ptr)
 131 {
 132     if (NULL != ptr->argv) {
 133         opal_argv_free(ptr->argv);
 134     }
 135     if (NULL != ptr->daemon) {
 136         OBJ_RELEASE(ptr->daemon);
 137     }
 138 }
 139 OBJ_CLASS_INSTANCE(orte_plm_rsh_caddy_t,
 140                    opal_list_item_t,
 141                    caddy_const, caddy_dest);
 142 
 143 typedef enum {
 144     ORTE_PLM_RSH_SHELL_BASH = 0,
 145     ORTE_PLM_RSH_SHELL_ZSH,
 146     ORTE_PLM_RSH_SHELL_TCSH,
 147     ORTE_PLM_RSH_SHELL_CSH,
 148     ORTE_PLM_RSH_SHELL_KSH,
 149     ORTE_PLM_RSH_SHELL_SH,
 150     ORTE_PLM_RSH_SHELL_UNKNOWN
 151 } orte_plm_rsh_shell_t;
 152 
 153 /* These strings *must* follow the same order as the enum ORTE_PLM_RSH_SHELL_* */
 154 static const char *orte_plm_rsh_shell_name[7] = {
 155     "bash",
 156     "zsh",
 157     "tcsh",       /* tcsh has to be first otherwise strstr finds csh */
 158     "csh",
 159     "ksh",
 160     "sh",
 161     "unknown"
 162 };
 163 
 164 /*
 165  * Local functions
 166  */
 167 static void set_handler_default(int sig);
 168 static orte_plm_rsh_shell_t find_shell(char *shell);
 169 static int launch_agent_setup(const char *agent, char *path);
 170 static void ssh_child(int argc, char **argv) __opal_attribute_noreturn__;
 171 static int rsh_probe(char *nodename,
 172                      orte_plm_rsh_shell_t *shell);
 173 static int setup_shell(orte_plm_rsh_shell_t *rshell,
 174                        orte_plm_rsh_shell_t *lshell,
 175                        char *nodename, int *argc, char ***argv);
 176 static void launch_daemons(int fd, short args, void *cbdata);
 177 static void process_launch_list(int fd, short args, void *cbdata);
 178 
 179 /* local global storage */
 180 static int num_in_progress=0;
 181 static opal_list_t launch_list;
 182 static opal_event_t launch_event;
 183 static char *rsh_agent_path=NULL;
 184 static char **rsh_agent_argv=NULL;
 185 
 186 /**
 187  * Init the module
 188  */
 189 static int rsh_init(void)
 190 {
 191     char *tmp;
 192     int rc;
 193 
 194     /* we were selected, so setup the launch agent */
 195     if (mca_plm_rsh_component.using_qrsh) {
 196         /* perform base setup for qrsh */
 197         opal_asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC"));
 198         if (ORTE_SUCCESS != (rc = launch_agent_setup("qrsh", tmp))) {
 199             ORTE_ERROR_LOG(rc);
 200             free(tmp);
 201             return rc;
 202         }
 203         free(tmp);
 204         /* automatically add -inherit and grid engine PE related flags */
 205         opal_argv_append_nosize(&rsh_agent_argv, "-inherit");
 206         /* Don't use the "-noshell" flag as qrsh would have a problem
 207          * swallowing a long command */
 208         opal_argv_append_nosize(&rsh_agent_argv, "-nostdin");
 209         opal_argv_append_nosize(&rsh_agent_argv, "-V");
 210         if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
 211             opal_argv_append_nosize(&rsh_agent_argv, "-verbose");
 212             tmp = opal_argv_join(rsh_agent_argv, ' ');
 213             opal_output_verbose(1, orte_plm_base_framework.framework_output,
 214                                 "%s plm:rsh: using \"%s\" for launching\n",
 215                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
 216             free(tmp);
 217         }
 218     } else if(mca_plm_rsh_component.using_llspawn) {
 219         /* perform base setup for llspawn */
 220         if (ORTE_SUCCESS != (rc = launch_agent_setup("llspawn", NULL))) {
 221             ORTE_ERROR_LOG(rc);
 222             return rc;
 223         }
 224         opal_output_verbose(1, orte_plm_base_framework.framework_output,
 225                             "%s plm:rsh: using \"%s\" for launching\n",
 226                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 227                             rsh_agent_path);
 228     } else {
 229         /* not using qrsh or llspawn - use MCA-specified agent */
 230         if (ORTE_SUCCESS != (rc = launch_agent_setup(mca_plm_rsh_component.agent, NULL))) {
 231             ORTE_ERROR_LOG(rc);
 232             return rc;
 233         }
 234     }
 235 
 236     /* point to our launch command */
 237     if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_LAUNCH_DAEMONS,
 238                                                        launch_daemons, ORTE_SYS_PRI))) {
 239         ORTE_ERROR_LOG(rc);
 240         return rc;
 241     }
 242 
 243     /* setup the event for metering the launch */
 244     OBJ_CONSTRUCT(&launch_list, opal_list_t);
 245     opal_event_set(orte_event_base, &launch_event, -1, 0, process_launch_list, NULL);
 246     opal_event_set_priority(&launch_event, ORTE_SYS_PRI);
 247 
 248     /* start the recvs */
 249     if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) {
 250         ORTE_ERROR_LOG(rc);
 251     }
 252 
 253     /* we assign daemon nodes at launch */
 254     orte_plm_globals.daemon_nodes_assigned_at_launch = true;
 255 
 256     return rc;
 257 }
 258 
 259 /**
 260  * Callback on daemon exit.
 261  */
 262 static void rsh_wait_daemon(int sd, short flags, void *cbdata)
 263 {
 264     orte_job_t *jdata;
 265     orte_wait_tracker_t *t2 = (orte_wait_tracker_t*)cbdata;
 266     orte_plm_rsh_caddy_t *caddy=(orte_plm_rsh_caddy_t*)t2->cbdata;
 267     orte_proc_t *daemon = caddy->daemon;
 268 
 269     if (orte_orteds_term_ordered || orte_abnormal_term_ordered) {
 270         /* ignore any such report - it will occur if we left the
 271          * session attached, e.g., while debugging
 272          */
 273         OBJ_RELEASE(caddy);
 274         OBJ_RELEASE(t2);
 275         return;
 276     }
 277 
 278     if (!WIFEXITED(daemon->exit_code) ||
 279         WEXITSTATUS(daemon->exit_code) != 0) { /* if abnormal exit */
 280         /* if we are not the HNP, send a message to the HNP alerting it
 281          * to the failure
 282          */
 283         if (!ORTE_PROC_IS_HNP) {
 284             opal_buffer_t *buf;
 285             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 286                                  "%s daemon %d failed with status %d",
 287                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 288                                  (int)daemon->name.vpid, WEXITSTATUS(daemon->exit_code)));
 289             buf = OBJ_NEW(opal_buffer_t);
 290             opal_dss.pack(buf, &(daemon->name.vpid), 1, ORTE_VPID);
 291             opal_dss.pack(buf, &daemon->exit_code, 1, OPAL_INT);
 292             orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
 293                                     ORTE_RML_TAG_REPORT_REMOTE_LAUNCH,
 294                                     orte_rml_send_callback, NULL);
 295             /* note that this daemon failed */
 296             daemon->state = ORTE_PROC_STATE_FAILED_TO_START;
 297         } else {
 298             jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
 299 
 300             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 301                                  "%s daemon %d failed with status %d",
 302                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 303                                  (int)daemon->name.vpid, WEXITSTATUS(daemon->exit_code)));
 304             /* set the exit status */
 305             ORTE_UPDATE_EXIT_STATUS(WEXITSTATUS(daemon->exit_code));
 306             /* note that this daemon failed */
 307             daemon->state = ORTE_PROC_STATE_FAILED_TO_START;
 308             /* increment the #daemons terminated so we will exit properly */
 309             jdata->num_terminated++;
 310             /* remove it from the routing table to ensure num_routes
 311              * returns the correct value
 312              */
 313             orte_routed.route_lost(&daemon->name);
 314             /* report that the daemon has failed so we can exit */
 315             ORTE_ACTIVATE_PROC_STATE(&daemon->name, ORTE_PROC_STATE_FAILED_TO_START);
 316         }
 317     }
 318 
 319     /* release any delay */
 320     --num_in_progress;
 321     if (num_in_progress < mca_plm_rsh_component.num_concurrent) {
 322         /* trigger continuation of the launch */
 323         opal_event_active(&launch_event, EV_WRITE, 1);
 324     }
 325     /* cleanup */
 326     OBJ_RELEASE(t2);
 327 }
 328 
 329 static int setup_launch(int *argcptr, char ***argvptr,
 330                         char *nodename,
 331                         int *node_name_index1,
 332                         int *proc_vpid_index, char *prefix_dir)
 333 {
 334     int argc;
 335     char **argv;
 336     char *param, *value;
 337     orte_plm_rsh_shell_t remote_shell, local_shell;
 338     int orted_argc;
 339     char **orted_argv;
 340     char *orted_cmd, *orted_prefix, *final_cmd;
 341     int orted_index;
 342     int rc;
 343     int i, j;
 344     bool found;
 345     char *lib_base=NULL, *bin_base=NULL;
 346     char *opal_prefix = getenv("OPAL_PREFIX");
 347     char* full_orted_cmd = NULL;
 348 
 349     /* Figure out the basenames for the libdir and bindir.  This
 350        requires some explanation:
 351 
 352        - Use opal_install_dirs.libdir and opal_install_dirs.bindir.
 353 
 354        - After a discussion on the devel-core mailing list, the
 355        developers decided that we should use the local directory
 356        basenames as the basis for the prefix on the remote note.
 357        This does not handle a few notable cases (e.g., if the
 358        libdir/bindir is not simply a subdir under the prefix, if the
 359        libdir/bindir basename is not the same on the remote node as
 360        it is here on the local node, etc.), but we decided that
 361        --prefix was meant to handle "the common case".  If you need
 362        something more complex than this, a) edit your shell startup
 363        files to set PATH/LD_LIBRARY_PATH properly on the remove
 364        node, or b) use some new/to-be-defined options that
 365        explicitly allow setting the bindir/libdir on the remote
 366        node.  We decided to implement these options (e.g.,
 367        --remote-bindir and --remote-libdir) to orterun when it
 368        actually becomes a problem for someone (vs. a hypothetical
 369        situation).
 370 
 371        Hence, for now, we simply take the basename of this install's
 372        libdir and bindir and use it to append this install's prefix
 373        and use that on the remote node.
 374     */
 375 
 376     /*
 377      * Build argv array
 378      */
 379     argv = opal_argv_copy(rsh_agent_argv);
 380     argc = opal_argv_count(argv);
 381     /* if any ssh args were provided, now is the time to add them */
 382     if (NULL != mca_plm_rsh_component.ssh_args) {
 383         char **ssh_argv;
 384         ssh_argv = opal_argv_split(mca_plm_rsh_component.ssh_args, ' ');
 385         for (i=0; NULL != ssh_argv[i]; i++) {
 386             opal_argv_append(&argc, &argv, ssh_argv[i]);
 387         }
 388         opal_argv_free(ssh_argv);
 389     }
 390     *node_name_index1 = argc;
 391     opal_argv_append(&argc, &argv, "<template>");
 392 
 393     /* setup the correct shell info */
 394     if (ORTE_SUCCESS != (rc = setup_shell(&remote_shell, &local_shell,
 395                                           nodename, &argc, &argv))) {
 396         ORTE_ERROR_LOG(rc);
 397         return rc;
 398     }
 399 
 400     /* now get the orted cmd - as specified by user - into our tmp array.
 401      * The function returns the location where the actual orted command is
 402      * located - usually in the final spot, but someone could
 403      * have added options. For example, it should be legal for them to use
 404      * "orted --debug-devel" so they get debug output from the orteds, but
 405      * not from mpirun. Also, they may have a customized version of orted
 406      * that takes arguments in addition to the std ones we already support
 407      */
 408     orted_argc = 0;
 409     orted_argv = NULL;
 410     orted_index = orte_plm_base_setup_orted_cmd(&orted_argc, &orted_argv);
 411 
 412     /* look at the returned orted cmd argv to check several cases:
 413      *
 414      * - only "orted" was given. This is the default and thus most common
 415      *   case. In this situation, there is nothing we need to do
 416      *
 417      * - something was given that doesn't include "orted" - i.e., someone
 418      *   has substituted their own daemon. There isn't anything we can
 419      *   do here, so we want to avoid adding prefixes to the cmd
 420      *
 421      * - something was given that precedes "orted". For example, someone
 422      *   may have specified "valgrind [options] orted". In this case, we
 423      *   need to separate out that "orted_prefix" section so it can be
 424      *   treated separately below
 425      *
 426      * - something was given that follows "orted". An example was given above.
 427      *   In this case, we need to construct the effective "orted_cmd" so it
 428      *   can be treated properly below
 429      *
 430      * Obviously, the latter two cases can be combined - just to make it
 431      * even more interesting! Gotta love rsh/ssh...
 432      */
 433     if (0 == orted_index) {
 434         /* single word cmd - this is the default scenario, but there could
 435          * be options specified so we need to account for that possibility.
 436          * However, we don't need/want a prefix as nothing precedes the orted
 437          * cmd itself
 438          */
 439         orted_cmd = opal_argv_join(orted_argv, ' ');
 440         orted_prefix = NULL;
 441     } else {
 442         /* okay, so the "orted" cmd is somewhere in this array, with
 443          * something preceding it and perhaps things following it.
 444          */
 445         orted_prefix = opal_argv_join_range(orted_argv, 0, orted_index, ' ');
 446         orted_cmd = opal_argv_join_range(orted_argv, orted_index, opal_argv_count(orted_argv), ' ');
 447     }
 448     opal_argv_free(orted_argv);  /* done with this */
 449 
 450     /* if the user specified a library path to pass, set it up now */
 451     param = opal_basename(opal_install_dirs.libdir);
 452     if (NULL != mca_plm_rsh_component.pass_libpath) {
 453         if (NULL != prefix_dir) {
 454             opal_asprintf(&lib_base, "%s:%s/%s", mca_plm_rsh_component.pass_libpath, prefix_dir, param);
 455         } else {
 456             opal_asprintf(&lib_base, "%s:%s", mca_plm_rsh_component.pass_libpath, param);
 457         }
 458     } else if (NULL != prefix_dir) {
 459         opal_asprintf(&lib_base, "%s/%s", prefix_dir, param);
 460     }
 461     free(param);
 462 
 463     /* we now need to assemble the actual cmd that will be executed - this depends
 464      * upon whether or not a prefix directory is being used
 465      */
 466     if (NULL != prefix_dir) {
 467         /* if we have a prefix directory, we need to set the PATH and
 468          * LD_LIBRARY_PATH on the remote node, and prepend just the orted_cmd
 469          * with the prefix directory
 470          */
 471 
 472         value = opal_basename(opal_install_dirs.bindir);
 473         opal_asprintf(&bin_base, "%s/%s", prefix_dir, value);
 474         free(value);
 475 
 476         if (NULL != orted_cmd) {
 477             if (0 == strcmp(orted_cmd, "orted")) {
 478                 /* if the cmd is our standard one, then add the prefix */
 479                 opal_asprintf(&full_orted_cmd, "%s/%s", bin_base, orted_cmd);
 480             } else {
 481                 /* someone specified something different, so don't prefix it */
 482                 full_orted_cmd = strdup(orted_cmd);
 483             }
 484             free(orted_cmd);
 485         }
 486     } else {
 487         full_orted_cmd = orted_cmd;
 488     }
 489 
 490     if (NULL != lib_base || NULL != bin_base) {
 491         if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
 492             ORTE_PLM_RSH_SHELL_KSH == remote_shell ||
 493             ORTE_PLM_RSH_SHELL_ZSH == remote_shell ||
 494             ORTE_PLM_RSH_SHELL_BASH == remote_shell) {
 495             /* if there is nothing preceding orted, then we can just
 496              * assemble the cmd with the orted_cmd at the end. Otherwise,
 497              * we have to insert the orted_prefix in the right place
 498              */
 499             opal_asprintf (&final_cmd,
 500                             "%s%s%s PATH=%s%s$PATH ; export PATH ; "
 501                             "LD_LIBRARY_PATH=%s%s$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
 502                             "DYLD_LIBRARY_PATH=%s%s$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ; "
 503                             "%s %s",
 504                             (opal_prefix != NULL ? "OPAL_PREFIX=" : " "),
 505                             (opal_prefix != NULL ? opal_prefix : " "),
 506                             (opal_prefix != NULL ? " ; export OPAL_PREFIX;" : " "),
 507                             (NULL != bin_base ? bin_base : " "),
 508                             (NULL != bin_base ? ":" : " "),
 509                             (NULL != lib_base ? lib_base : " "),
 510                             (NULL != lib_base ? ":" : " "),
 511                             (NULL != lib_base ? lib_base : " "),
 512                             (NULL != lib_base ? ":" : " "),
 513                             (orted_prefix != NULL ? orted_prefix : " "),
 514                             (full_orted_cmd != NULL ? full_orted_cmd : " "));
 515         } else if (ORTE_PLM_RSH_SHELL_TCSH == remote_shell ||
 516                    ORTE_PLM_RSH_SHELL_CSH == remote_shell) {
 517             /* [t]csh is a bit more challenging -- we
 518                have to check whether LD_LIBRARY_PATH
 519                is already set before we try to set it.
 520                Must be very careful about obeying
 521                [t]csh's order of evaluation and not
 522                using a variable before it is defined.
 523                See this thread for more details:
 524                http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
 525             /* if there is nothing preceding orted, then we can just
 526              * assemble the cmd with the orted_cmd at the end. Otherwise,
 527              * we have to insert the orted_prefix in the right place
 528              */
 529             opal_asprintf (&final_cmd,
 530                             "%s%s%s set path = ( %s $path ) ; "
 531                             "if ( $?LD_LIBRARY_PATH == 1 ) "
 532                             "set OMPI_have_llp ; "
 533                             "if ( $?LD_LIBRARY_PATH == 0 ) "
 534                             "setenv LD_LIBRARY_PATH %s ; "
 535                             "if ( $?OMPI_have_llp == 1 ) "
 536                             "setenv LD_LIBRARY_PATH %s%s$LD_LIBRARY_PATH ; "
 537                             "if ( $?DYLD_LIBRARY_PATH == 1 ) "
 538                             "set OMPI_have_dllp ; "
 539                             "if ( $?DYLD_LIBRARY_PATH == 0 ) "
 540                             "setenv DYLD_LIBRARY_PATH %s ; "
 541                             "if ( $?OMPI_have_dllp == 1 ) "
 542                             "setenv DYLD_LIBRARY_PATH %s%s$DYLD_LIBRARY_PATH ; "
 543                             "%s %s",
 544                             (opal_prefix != NULL ? "setenv OPAL_PREFIX " : " "),
 545                             (opal_prefix != NULL ? opal_prefix : " "),
 546                             (opal_prefix != NULL ? " ;" : " "),
 547                             (NULL != bin_base ? bin_base : " "),
 548                             (NULL != lib_base ? lib_base : " "),
 549                             (NULL != lib_base ? lib_base : " "),
 550                             (NULL != lib_base ? ":" : " "),
 551                             (NULL != lib_base ? lib_base : " "),
 552                             (NULL != lib_base ? lib_base : " "),
 553                             (NULL != lib_base ? ":" : " "),
 554                             (orted_prefix != NULL ? orted_prefix : " "),
 555                             (full_orted_cmd != NULL ? full_orted_cmd : " "));
 556         } else {
 557             orte_show_help("help-plm-rsh.txt", "cannot-resolve-shell-with-prefix", true,
 558                            (NULL == opal_prefix) ? "NULL" : opal_prefix,
 559                            prefix_dir);
 560             if (NULL != bin_base) {
 561                 free(bin_base);
 562             }
 563             if (NULL != lib_base) {
 564                 free(lib_base);
 565             }
 566             if (NULL != orted_prefix) free(orted_prefix);
 567             if (NULL != full_orted_cmd) free(full_orted_cmd);
 568             return ORTE_ERR_SILENT;
 569         }
 570         if (NULL != bin_base) {
 571             free(bin_base);
 572         }
 573         if (NULL != lib_base) {
 574             free(lib_base);
 575         }
 576         if( NULL != full_orted_cmd ) {
 577             free(full_orted_cmd);
 578         }
 579     } else {
 580         /* no prefix directory, so just aggregate the result */
 581         opal_asprintf(&final_cmd, "%s %s",
 582                        (orted_prefix != NULL ? orted_prefix : ""),
 583                        (full_orted_cmd != NULL ? full_orted_cmd : ""));
 584         if (NULL != full_orted_cmd) {
 585             free(full_orted_cmd);
 586         }
 587     }
 588     /* now add the final cmd to the argv array */
 589     opal_argv_append(&argc, &argv, final_cmd);
 590     free(final_cmd);  /* done with this */
 591     if (NULL != orted_prefix) free(orted_prefix);
 592 
 593     /* if we are not tree launching or debugging, tell the daemon
 594      * to daemonize so we can launch the next group
 595      */
 596     if (mca_plm_rsh_component.no_tree_spawn &&
 597         !orte_debug_flag &&
 598         !orte_debug_daemons_flag &&
 599         !orte_debug_daemons_file_flag &&
 600         !orte_leave_session_attached &&
 601         /* Daemonize when not using qrsh.  Or, if using qrsh, only
 602          * daemonize if told to by user with daemonize_qrsh flag. */
 603         ((!mca_plm_rsh_component.using_qrsh) ||
 604          (mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
 605         ((!mca_plm_rsh_component.using_llspawn) ||
 606          (mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
 607     }
 608 
 609     /*
 610      * Add the basic arguments to the orted command line, including
 611      * all debug options
 612      */
 613     orte_plm_base_orted_append_basic_args(&argc, &argv,
 614                                           "env",
 615                                           proc_vpid_index);
 616 
 617     /* ensure that only the ssh plm is selected on the remote daemon */
 618     opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
 619     opal_argv_append(&argc, &argv, "plm");
 620     opal_argv_append(&argc, &argv, "rsh");
 621 
 622     /* if we are tree-spawning, tell our child daemons the
 623      * uri of their parent (me) */
 624     if (!mca_plm_rsh_component.no_tree_spawn) {
 625         opal_argv_append(&argc, &argv, "--tree-spawn");
 626         orte_oob_base_get_addr(&param);
 627         opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
 628         opal_argv_append(&argc, &argv, "orte_parent_uri");
 629         opal_argv_append(&argc, &argv, param);
 630         free(param);
 631     }
 632 
 633     /* unless told otherwise... */
 634     if (mca_plm_rsh_component.pass_environ_mca_params) {
 635         /* now check our local environment for MCA params - add them
 636          * only if they aren't already present
 637          */
 638         for (i = 0; NULL != environ[i]; ++i) {
 639             if (0 == strncmp(OPAL_MCA_PREFIX"mca_base_env_list", environ[i],
 640                              strlen(OPAL_MCA_PREFIX"mca_base_env_list"))) {
 641                 /* ignore this one */
 642                 continue;
 643             }
 644             if (0 == strncmp(OPAL_MCA_PREFIX, environ[i], 9)) {
 645                 /* check for duplicate in app->env - this
 646                  * would have been placed there by the
 647                  * cmd line processor. By convention, we
 648                  * always let the cmd line override the
 649                  * environment
 650                  */
 651                 param = strdup(&environ[i][9]);
 652                 value = strchr(param, '=');
 653                 *value = '\0';
 654                 value++;
 655                 found = false;
 656                 /* see if this param exists on the cmd line */
 657                 for (j=0; NULL != argv[j]; j++) {
 658                     if (0 == strcmp(param, argv[j])) {
 659                         found = true;
 660                         break;
 661                     }
 662                 }
 663                 if (!found) {
 664                     /* add it */
 665                     opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
 666                     opal_argv_append(&argc, &argv, param);
 667                     opal_argv_append(&argc, &argv, value);
 668                 }
 669                 free(param);
 670             }
 671         }
 672     }
 673 
 674     /* protect the params */
 675     mca_base_cmd_line_wrap_args(argv);
 676 
 677     value = opal_argv_join(argv, ' ');
 678     if (sysconf(_SC_ARG_MAX) < (int)strlen(value)) {
 679         orte_show_help("help-plm-rsh.txt", "cmd-line-too-long",
 680                        true, strlen(value), sysconf(_SC_ARG_MAX));
 681         free(value);
 682         return ORTE_ERR_SILENT;
 683     }
 684     free(value);
 685 
 686     if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
 687         ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
 688         opal_argv_append(&argc, &argv, ")");
 689     }
 690 
 691     if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
 692         param = opal_argv_join(argv, ' ');
 693         opal_output(orte_plm_base_framework.framework_output,
 694                     "%s plm:rsh: final template argv:\n\t%s",
 695                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 696                     (NULL == param) ? "NULL" : param);
 697         if (NULL != param) free(param);
 698     }
 699 
 700     /* all done */
 701     *argcptr = argc;
 702     *argvptr = argv;
 703     return ORTE_SUCCESS;
 704 }
 705 
 706 /* actually ssh the child */
 707 static void ssh_child(int argc, char **argv)
 708 {
 709     char** env;
 710     char* var;
 711     long fd, fdmax = sysconf(_SC_OPEN_MAX);
 712     char *exec_path;
 713     char **exec_argv;
 714     int fdin;
 715     sigset_t sigs;
 716 
 717     /* setup environment */
 718     env = opal_argv_copy(orte_launch_environ);
 719 
 720     /* We don't need to sense an oversubscribed condition and set the sched_yield
 721      * for the node as we are only launching the daemons at this time. The daemons
 722      * are now smart enough to set the oversubscribed condition themselves when
 723      * they launch the local procs.
 724      */
 725 
 726     /* We cannot launch locally as this would cause multiple daemons to
 727      * exist on a node (HNP counts as a daemon). This is taken care of
 728      * by the earlier check for daemon_preexists, so we only have to worry
 729      * about remote launches here
 730      */
 731     exec_argv = argv;
 732     exec_path = strdup(rsh_agent_path);
 733 
 734     /* Don't let ssh slurp all of our stdin! */
 735     fdin = open("/dev/null", O_RDWR);
 736     dup2(fdin, 0);
 737     close(fdin);
 738 
 739     /* close all file descriptors w/ exception of stdin/stdout/stderr */
 740     for(fd=3; fd<fdmax; fd++)
 741         close(fd);
 742 
 743     /* Set signal handlers back to the default.  Do this close
 744      to the execve() because the event library may (and likely
 745      will) reset them.  If we don't do this, the event
 746      library may have left some set that, at least on some
 747      OS's, don't get reset via fork() or exec().  Hence, the
 748      orted could be unkillable (for example). */
 749 
 750     set_handler_default(SIGTERM);
 751     set_handler_default(SIGINT);
 752     set_handler_default(SIGHUP);
 753     set_handler_default(SIGPIPE);
 754     set_handler_default(SIGCHLD);
 755 
 756     /* Unblock all signals, for many of the same reasons that
 757      we set the default handlers, above.  This is noticable
 758      on Linux where the event library blocks SIGTERM, but we
 759      don't want that blocked by the orted (or, more
 760      specifically, we don't want it to be blocked by the
 761      orted and then inherited by the ORTE processes that it
 762      forks, making them unkillable by SIGTERM). */
 763     sigprocmask(0, 0, &sigs);
 764     sigprocmask(SIG_UNBLOCK, &sigs, 0);
 765 
 766     /* exec the daemon */
 767     var = opal_argv_join(argv, ' ');
 768     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 769                          "%s plm:rsh: executing: (%s) [%s]",
 770                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 771                          exec_path, (NULL == var) ? "NULL" : var));
 772     if (NULL != var) free(var);
 773 
 774     execve(exec_path, exec_argv, env);
 775     opal_output(0, "plm:rsh: execv of %s failed with errno=%s(%d)\n",
 776                 exec_path, strerror(errno), errno);
 777     exit(-1);
 778 }
 779 
 780 /*
 781  * launch a set of daemons from a remote daemon
 782  */
 783 static int remote_spawn(void)
 784 {
 785     int node_name_index1;
 786     int proc_vpid_index;
 787     char **argv = NULL;
 788     char *prefix, *hostname, *var;
 789     int argc;
 790     int rc=ORTE_SUCCESS;
 791     bool failed_launch = true;
 792     orte_process_name_t target;
 793     orte_plm_rsh_caddy_t *caddy;
 794     orte_job_t *daemons;
 795     opal_list_t coll;
 796     orte_namelist_t *child;
 797 
 798     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 799                          "%s plm:rsh: remote spawn called",
 800                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 801 
 802     /* if we hit any errors, tell the HNP it was us */
 803     target.vpid = ORTE_PROC_MY_NAME->vpid;
 804 
 805     /* check to see if enable-orterun-prefix-by-default was given - if
 806      * this is being done by a singleton, then orterun will not be there
 807      * to put the prefix in the app. So make sure we check to find it */
 808     if ((bool)ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT) {
 809         prefix = strdup(opal_install_dirs.prefix);
 810     } else {
 811         prefix = NULL;
 812     }
 813 
 814     /* get the updated routing list */
 815     OBJ_CONSTRUCT(&coll, opal_list_t);
 816     orte_routed.get_routing_list(&coll);
 817 
 818     /* if I have no children, just return */
 819     if (0 == opal_list_get_size(&coll)) {
 820         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 821                              "%s plm:rsh: remote spawn - have no children!",
 822                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 823         failed_launch = false;
 824         rc = ORTE_SUCCESS;
 825         OBJ_DESTRUCT(&coll);
 826         goto cleanup;
 827     }
 828 
 829     /* setup the launch */
 830     if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv,
 831                                            orte_process_info.nodename, &node_name_index1,
 832                                            &proc_vpid_index, prefix))) {
 833         ORTE_ERROR_LOG(rc);
 834         OBJ_DESTRUCT(&coll);
 835         goto cleanup;
 836     }
 837 
 838     /* get the daemon job object */
 839     if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
 840         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 841         rc = ORTE_ERR_NOT_FOUND;
 842         OBJ_DESTRUCT(&coll);
 843         goto cleanup;
 844     }
 845 
 846     target.jobid = ORTE_PROC_MY_NAME->jobid;
 847     OPAL_LIST_FOREACH(child, &coll, orte_namelist_t) {
 848         target.vpid = child->name.vpid;
 849 
 850         /* get the host where this daemon resides */
 851         if (NULL == (hostname = orte_get_proc_hostname(&target))) {
 852             opal_output(0, "%s unable to get hostname for daemon %s",
 853                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(child->name.vpid));
 854             rc = ORTE_ERR_NOT_FOUND;
 855             OBJ_DESTRUCT(&coll);
 856             goto cleanup;
 857         }
 858 
 859         free(argv[node_name_index1]);
 860         argv[node_name_index1] = strdup(hostname);
 861 
 862         /* pass the vpid */
 863         rc = orte_util_convert_vpid_to_string(&var, target.vpid);
 864         if (ORTE_SUCCESS != rc) {
 865             opal_output(0, "orte_plm_rsh: unable to get daemon vpid as string");
 866             exit(-1);
 867         }
 868         free(argv[proc_vpid_index]);
 869         argv[proc_vpid_index] = strdup(var);
 870         free(var);
 871 
 872         /* we are in an event, so no need to protect the list */
 873         caddy = OBJ_NEW(orte_plm_rsh_caddy_t);
 874         caddy->argc = argc;
 875         caddy->argv = opal_argv_copy(argv);
 876         /* fake a proc structure for the new daemon - will be released
 877          * upon startup
 878          */
 879         caddy->daemon = OBJ_NEW(orte_proc_t);
 880         caddy->daemon->name.jobid = ORTE_PROC_MY_NAME->jobid;
 881         caddy->daemon->name.vpid = target.vpid;
 882         opal_list_append(&launch_list, &caddy->super);
 883     }
 884     OPAL_LIST_DESTRUCT(&coll);
 885     /* we NEVER use tree-spawn for secondary launches - e.g.,
 886      * due to a dynamic launch requesting add_hosts - so be
 887      * sure to turn it off here */
 888     mca_plm_rsh_component.no_tree_spawn = true;
 889 
 890     /* trigger the event to start processing the launch list */
 891     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 892                          "%s plm:rsh: activating launch event",
 893                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 894     opal_event_active(&launch_event, EV_WRITE, 1);
 895 
 896     /* declare the launch a success */
 897     failed_launch = false;
 898 
 899 cleanup:
 900     if (NULL != argv) {
 901         opal_argv_free(argv);
 902     }
 903 
 904     /* check for failed launch */
 905     if (failed_launch) {
 906         /* report cannot launch this daemon to HNP */
 907         opal_buffer_t *buf;
 908         buf = OBJ_NEW(opal_buffer_t);
 909         opal_dss.pack(buf, &target.vpid, 1, ORTE_VPID);
 910         opal_dss.pack(buf, &rc, 1, OPAL_INT);
 911         orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
 912                                 ORTE_RML_TAG_REPORT_REMOTE_LAUNCH,
 913                                 orte_rml_send_callback, NULL);
 914     }
 915 
 916     return rc;
 917 }
 918 
 919 /*
 920  * Launch a daemon (bootproxy) on each node. The daemon will be responsible
 921  * for launching the application.
 922  */
 923 
 924 static int rsh_launch(orte_job_t *jdata)
 925 {
 926     if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
 927         /* this is a restart situation - skip to the mapping stage */
 928         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
 929     } else {
 930         /* new job - set it up */
 931         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
 932     }
 933     return ORTE_SUCCESS;
 934 }
 935 
 936 static void process_launch_list(int fd, short args, void *cbdata)
 937 {
 938     opal_list_item_t *item;
 939     pid_t pid;
 940     orte_plm_rsh_caddy_t *caddy;
 941 
 942     ORTE_ACQUIRE_OBJECT(caddy);
 943 
 944     while (num_in_progress < mca_plm_rsh_component.num_concurrent) {
 945         item = opal_list_remove_first(&launch_list);
 946         if (NULL == item) {
 947             /* we are done */
 948             break;
 949         }
 950         caddy = (orte_plm_rsh_caddy_t*)item;
 951         /* register the sigchild callback */
 952         ORTE_FLAG_SET(caddy->daemon, ORTE_PROC_FLAG_ALIVE);
 953         orte_wait_cb(caddy->daemon, rsh_wait_daemon, orte_event_base, (void*)caddy);
 954 
 955         /* fork a child to exec the rsh/ssh session */
 956         pid = fork();
 957         if (pid < 0) {
 958             ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
 959             orte_wait_cb_cancel(caddy->daemon);
 960             continue;
 961         }
 962 
 963         /* child */
 964         if (pid == 0) {
 965             /*
 966              * When the user presses CTRL-C, SIGINT is sent to the whole process
 967              * group which terminates the rsh/ssh command. This can cause the
 968              * remote daemon to crash with a SIGPIPE when it tried to print out
 969              * status information. This has two concequences:
 970              * 1) The remote node is not cleaned up as it should. The local
 971              *    processes will notice that the orted failed and cleanup their
 972              *    part of the session directory, but the job level part will
 973              *    remain littered.
 974              * 2) Any debugging information we expected to see from the orted
 975              *    during shutdown is lost.
 976              *
 977              * The solution here is to put the child processes in a separate
 978              * process group from the HNP. So when the user presses CTRL-C
 979              * then only the HNP receives the signal, and not the rsh/ssh
 980              * child processes.
 981              */
 982 #if HAVE_SETPGID
 983             if( 0 != setpgid(0, 0) ) {
 984                 opal_output(0, "plm:rsh: Error: setpgid(0,0) failed in child with errno=%s(%d)\n",
 985                             strerror(errno), errno);
 986                 exit(-1);
 987             }
 988 #endif
 989 
 990             /* do the ssh launch - this will exit if it fails */
 991             ssh_child(caddy->argc, caddy->argv);
 992         } else { /* father */
 993             // Put the child in a separate progress group
 994             // - see comment in child section.
 995 #if HAVE_SETPGID
 996             if( 0 != setpgid(pid, pid) ) {
 997                 opal_output(0, "plm:rsh: Warning: setpgid(%ld,%ld) failed in parent with errno=%s(%d)\n",
 998                             (long)pid, (long)pid, strerror(errno), errno);
 999                 // Ignore this error since the child is off and running.
1000                 // We still need to track it.
1001             }
1002 #endif
1003 
1004             /* indicate this daemon has been launched */
1005             caddy->daemon->state = ORTE_PROC_STATE_RUNNING;
1006             /* record the pid of the ssh fork */
1007             caddy->daemon->pid = pid;
1008 
1009             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1010                                  "%s plm:rsh: recording launch of daemon %s",
1011                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1012                                  ORTE_NAME_PRINT(&(caddy->daemon->name))));
1013             num_in_progress++;
1014         }
1015     }
1016 }
1017 
1018 static void launch_daemons(int fd, short args, void *cbdata)
1019 {
1020     orte_job_map_t *map = NULL;
1021     int node_name_index1;
1022     int proc_vpid_index;
1023     char **argv = NULL;
1024     char *prefix_dir=NULL, *var;
1025     int argc;
1026     int rc;
1027     orte_app_context_t *app;
1028     orte_node_t *node, *nd;
1029     orte_std_cntr_t nnode;
1030     orte_job_t *daemons;
1031     orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
1032     orte_plm_rsh_caddy_t *caddy;
1033     opal_list_t coll;
1034     char *username;
1035     int port, *portptr;
1036     orte_namelist_t *child;
1037 
1038     ORTE_ACQUIRE_OBJECT(state);
1039 
1040     /* if we are launching debugger daemons, then just go
1041      * do it - no new daemons will be launched
1042      */
1043     if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
1044         state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
1045         ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
1046         OBJ_RELEASE(state);
1047         return;
1048     }
1049 
1050     /* setup the virtual machine */
1051     daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
1052     if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(state->jdata))) {
1053         ORTE_ERROR_LOG(rc);
1054         goto cleanup;
1055     }
1056 
1057     /* if we don't want to launch, then don't attempt to
1058      * launch the daemons - the user really wants to just
1059      * look at the proposed process map
1060      */
1061     if (orte_do_not_launch) {
1062         /* set the state to indicate the daemons reported - this
1063          * will trigger the daemons_reported event and cause the
1064          * job to move to the following step
1065          */
1066         state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
1067         ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
1068         OBJ_RELEASE(state);
1069         return;
1070     }
1071 
1072     /* Get the map for this job */
1073     if (NULL == (map = daemons->map)) {
1074         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1075         rc = ORTE_ERR_NOT_FOUND;
1076         goto cleanup;
1077     }
1078 
1079     if (0 == map->num_new_daemons) {
1080         /* set the state to indicate the daemons reported - this
1081          * will trigger the daemons_reported event and cause the
1082          * job to move to the following step
1083          */
1084         state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
1085         ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
1086         OBJ_RELEASE(state);
1087         return;
1088     }
1089 
1090     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1091                          "%s plm:rsh: launching vm",
1092                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1093 
1094     if ((0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output) ||
1095          orte_leave_session_attached) &&
1096         mca_plm_rsh_component.num_concurrent < map->num_new_daemons) {
1097         /**
1098          * If we are in '--debug-daemons' we keep the ssh connection
1099          * alive for the span of the run. If we use this option
1100          * AND we launch on more than "num_concurrent" machines
1101          * then we will deadlock. No connections are terminated
1102          * until the job is complete, no job is started
1103          * since all the orteds are waiting for all the others
1104          * to come online, and the others ore not launched because
1105          * we are waiting on those that have started to terminate
1106          * their ssh tunnels. :(
1107          * As we cannot run in this situation, pretty print the error
1108          * and return an error code.
1109          */
1110         orte_show_help("help-plm-rsh.txt", "deadlock-params",
1111                        true, mca_plm_rsh_component.num_concurrent, map->num_new_daemons);
1112         ORTE_ERROR_LOG(ORTE_ERR_FATAL);
1113         OBJ_RELEASE(state);
1114         rc = ORTE_ERR_SILENT;
1115         goto cleanup;
1116     }
1117 
1118     /*
1119      * After a discussion between Ralph & Jeff, we concluded that we
1120      * really are handling the prefix dir option incorrectly. It currently
1121      * is associated with an app_context, yet it really refers to the
1122      * location where OpenRTE/Open MPI is installed on a NODE. Fixing
1123      * this right now would involve significant change to orterun as well
1124      * as elsewhere, so we will intentionally leave this incorrect at this
1125      * point. The error, however, is identical to that seen in all prior
1126      * releases of OpenRTE/Open MPI, so our behavior is no worse than before.
1127      *
1128      * A note to fix this, along with ideas on how to do so, has been filed
1129      * on the project's Trac system under "feature enhancement".
1130      *
1131      * For now, default to the prefix_dir provided in the first app_context.
1132      * Since there always MUST be at least one app_context, we are safe in
1133      * doing this.
1134      */
1135     app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, 0);
1136     if (!orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&prefix_dir, OPAL_STRING)) {
1137         /* check to see if enable-orterun-prefix-by-default was given - if
1138          * this is being done by a singleton, then orterun will not be there
1139          * to put the prefix in the app. So make sure we check to find it */
1140         if ((bool)ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT) {
1141             prefix_dir = strdup(opal_install_dirs.prefix);
1142         }
1143     }
1144     /* we also need at least one node name so we can check what shell is
1145      * being used, if we have to
1146      */
1147     node = NULL;
1148     for (nnode = 0; nnode < map->nodes->size; nnode++) {
1149         if (NULL != (nd = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
1150             node = nd;
1151             /* if the node is me, then we continue - we would
1152              * prefer to find some other node so we can tell what the remote
1153              * shell is, if necessary
1154              */
1155             if (0 != strcmp(node->name, orte_process_info.nodename)) {
1156                 break;
1157             }
1158         }
1159     }
1160     if (NULL == node) {
1161         /* this should be impossible, but adding the check will
1162          * silence code checkers that don't know better */
1163         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1164         rc = ORTE_ERR_NOT_FOUND;
1165         goto cleanup;
1166     }
1167 
1168     /* if we are tree launching, find our children and create the launch cmd */
1169     if (!mca_plm_rsh_component.no_tree_spawn) {
1170         orte_job_t *jdatorted;
1171 
1172         /* get the orted job data object */
1173         if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
1174             ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1175             rc = ORTE_ERR_NOT_FOUND;
1176             goto cleanup;
1177         }
1178 
1179         /* get the updated routing list */
1180         OBJ_CONSTRUCT(&coll, opal_list_t);
1181         orte_routed.get_routing_list(&coll);
1182     }
1183 
1184     /* setup the launch */
1185     if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1,
1186                                            &proc_vpid_index, prefix_dir))) {
1187         ORTE_ERROR_LOG(rc);
1188         goto cleanup;
1189     }
1190 
1191     /*
1192      * Iterate through each of the nodes
1193      */
1194     for (nnode=0; nnode < map->nodes->size; nnode++) {
1195         if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
1196             continue;
1197         }
1198 
1199         /* if we are tree launching, only launch our own children */
1200         if (!mca_plm_rsh_component.no_tree_spawn) {
1201             OPAL_LIST_FOREACH(child, &coll, orte_namelist_t) {
1202                 if (child->name.vpid == node->daemon->name.vpid) {
1203                     goto launch;
1204                 }
1205             }
1206             /* didn't find it - ignore this node */
1207             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1208                                  "%s plm:rsh:launch daemon %s not a child of mine",
1209                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1210                                  ORTE_VPID_PRINT(node->daemon->name.vpid)));
1211             continue;
1212         }
1213 
1214     launch:
1215         /* if this daemon already exists, don't launch it! */
1216         if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
1217             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1218                                  "%s plm:rsh:launch daemon already exists on node %s",
1219                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1220                                  node->name));
1221             continue;
1222         }
1223 
1224         /* if the node's daemon has not been defined, then we
1225          * have an error!
1226          */
1227         if (NULL == node->daemon) {
1228             ORTE_ERROR_LOG(ORTE_ERR_FATAL);
1229             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1230                                  "%s plm:rsh:launch daemon failed to be defined on node %s",
1231                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1232                                  node->name));
1233             continue;
1234         }
1235 
1236         /* setup node name */
1237         free(argv[node_name_index1]);
1238         username = NULL;
1239         if (orte_get_attribute(&node->attributes, ORTE_NODE_USERNAME, (void**)&username, OPAL_STRING)) {
1240             opal_asprintf (&argv[node_name_index1], "%s@%s",
1241                             username, node->name);
1242             free(username);
1243         } else {
1244             argv[node_name_index1] = strdup(node->name);
1245         }
1246 
1247         /* pass the vpid */
1248         rc = orte_util_convert_vpid_to_string(&var, node->daemon->name.vpid);
1249         if (ORTE_SUCCESS != rc) {
1250             opal_output(0, "orte_plm_rsh: unable to get daemon vpid as string");
1251             exit(-1);
1252         }
1253         free(argv[proc_vpid_index]);
1254         argv[proc_vpid_index] = strdup(var);
1255         free(var);
1256 
1257         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1258                              "%s plm:rsh: adding node %s to launch list",
1259                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1260                              node->name));
1261 
1262         /* we are in an event, so no need to protect the list */
1263         caddy = OBJ_NEW(orte_plm_rsh_caddy_t);
1264         caddy->argc = argc;
1265         caddy->argv = opal_argv_copy(argv);
1266         /* insert the alternate port if any */
1267         portptr = &port;
1268         if (orte_get_attribute(&node->attributes, ORTE_NODE_PORT, (void**)&portptr, OPAL_INT)) {
1269             char portname[16];
1270             /* for the sake of simplicity, insert "-p" <port> in the duplicated argv */
1271             opal_argv_insert_element(&caddy->argv, node_name_index1+1, "-p");
1272             snprintf (portname, 15, "%d", port);
1273             opal_argv_insert_element(&caddy->argv, node_name_index1+2, portname);
1274         }
1275         caddy->daemon = node->daemon;
1276         OBJ_RETAIN(caddy->daemon);
1277         opal_list_append(&launch_list, &caddy->super);
1278     }
1279     /* we NEVER use tree-spawn for secondary launches - e.g.,
1280      * due to a dynamic launch requesting add_hosts - so be
1281      * sure to turn it off here */
1282     mca_plm_rsh_component.no_tree_spawn = true;
1283 
1284     /* set the job state to indicate the daemons are launched */
1285     state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
1286 
1287     /* trigger the event to start processing the launch list */
1288     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1289                          "%s plm:rsh: activating launch event",
1290                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1291     ORTE_POST_OBJECT(state);
1292     opal_event_active(&launch_event, EV_WRITE, 1);
1293 
1294     /* now that we've launched the daemons, let the daemon callback
1295      * function determine they are all alive and trigger the next stage
1296      */
1297     OBJ_RELEASE(state);
1298     opal_argv_free(argv);
1299     return;
1300 
1301  cleanup:
1302     OBJ_RELEASE(state);
1303     ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
1304 }
1305 
1306 /**
1307  * Terminate the orteds for a given job
1308  */
1309 static int rsh_terminate_orteds(void)
1310 {
1311     int rc;
1312 
1313     if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
1314         ORTE_ERROR_LOG(rc);
1315     }
1316 
1317     return rc;
1318 }
1319 
1320 static int rsh_finalize(void)
1321 {
1322     int rc, i;
1323     orte_job_t *jdata;
1324     orte_proc_t *proc;
1325     pid_t ret;
1326 
1327     /* remove launch event */
1328     opal_event_del(&launch_event);
1329     OPAL_LIST_DESTRUCT(&launch_list);
1330 
1331     /* cleanup any pending recvs */
1332     if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
1333         ORTE_ERROR_LOG(rc);
1334     }
1335 
1336     if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && orte_abnormal_term_ordered) {
1337         /* ensure that any lingering ssh's are gone */
1338         if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
1339             return rc;
1340         }
1341         for (i=0; i < jdata->procs->size; i++) {
1342             if (NULL == (proc = opal_pointer_array_get_item(jdata->procs, i))) {
1343                 continue;
1344             }
1345             if (0 < proc->pid) {
1346                 /* this is a daemon we started - see if the ssh process still exists */
1347                 ret = waitpid(proc->pid, &proc->exit_code, WNOHANG);
1348                 if (-1 == ret && ECHILD == errno) {
1349                     /* The pid no longer exists, so we'll call this "good
1350                        enough for government work" */
1351                     continue;
1352                 }
1353                 if (ret == proc->pid) {
1354                     /* already died */
1355                     continue;
1356                 }
1357                 /* ssh session must still be alive, so kill it */
1358                 kill(proc->pid, SIGKILL);
1359             }
1360         }
1361     }
1362     free(mca_plm_rsh_component.agent_path);
1363     free(rsh_agent_path);
1364     opal_argv_free(mca_plm_rsh_component.agent_argv);
1365     opal_argv_free(rsh_agent_argv);
1366 
1367     return rc;
1368 }
1369 
1370 
1371 static void set_handler_default(int sig)
1372 {
1373     struct sigaction act;
1374 
1375     act.sa_handler = SIG_DFL;
1376     act.sa_flags = 0;
1377     sigemptyset(&act.sa_mask);
1378 
1379     sigaction(sig, &act, (struct sigaction *)0);
1380 }
1381 
1382 
1383 static orte_plm_rsh_shell_t find_shell(char *shell)
1384 {
1385     int i         = 0;
1386     char *sh_name = NULL;
1387 
1388     if( (NULL == shell) || (strlen(shell) == 1) ) {
1389         /* Malformed shell */
1390         return ORTE_PLM_RSH_SHELL_UNKNOWN;
1391     }
1392 
1393     sh_name = rindex(shell, '/');
1394     if( NULL == sh_name ) {
1395         /* Malformed shell */
1396         return ORTE_PLM_RSH_SHELL_UNKNOWN;
1397     }
1398 
1399     /* skip the '/' */
1400     ++sh_name;
1401     for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name) /
1402                           sizeof(orte_plm_rsh_shell_name[0])); ++i) {
1403         if (NULL != strstr(sh_name, orte_plm_rsh_shell_name[i])) {
1404             return (orte_plm_rsh_shell_t)i;
1405         }
1406     }
1407 
1408     /* We didn't find it */
1409     return ORTE_PLM_RSH_SHELL_UNKNOWN;
1410 }
1411 
1412 static int launch_agent_setup(const char *agent, char *path)
1413 {
1414     char *bname;
1415     int i;
1416 
1417     /* if no agent was provided, then report not found */
1418     if (NULL == mca_plm_rsh_component.agent && NULL == agent) {
1419         return ORTE_ERR_NOT_FOUND;
1420     }
1421 
1422     /* search for the argv */
1423     OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1424                          "%s plm:rsh_setup on agent %s path %s",
1425                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1426                          (NULL == agent) ? mca_plm_rsh_component.agent : agent,
1427                          (NULL == path) ? "NULL" : path));
1428     rsh_agent_argv = orte_plm_rsh_search(agent, path);
1429 
1430     if (0 == opal_argv_count(rsh_agent_argv)) {
1431         /* nothing was found */
1432         return ORTE_ERR_NOT_FOUND;
1433     }
1434 
1435     /* see if we can find the agent in the path */
1436     rsh_agent_path = opal_path_findv(rsh_agent_argv[0], X_OK, environ, path);
1437 
1438     if (NULL == rsh_agent_path) {
1439         /* not an error - just report not found */
1440         opal_argv_free(rsh_agent_argv);
1441         return ORTE_ERR_NOT_FOUND;
1442     }
1443 
1444     bname = opal_basename(rsh_agent_argv[0]);
1445     if (NULL != bname && 0 == strcmp(bname, "ssh")) {
1446         /* if xterm option was given, add '-X', ensuring we don't do it twice */
1447         if (NULL != orte_xterm) {
1448             opal_argv_append_unique_nosize(&rsh_agent_argv, "-X", false);
1449         } else if (0 >= opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
1450             /* if debug was not specified, and the user didn't explicitly
1451              * specify X11 forwarding/non-forwarding, add "-x" if it
1452              * isn't already there (check either case)
1453              */
1454             for (i = 1; NULL != rsh_agent_argv[i]; ++i) {
1455                 if (0 == strcasecmp("-x", rsh_agent_argv[i])) {
1456                     break;
1457                 }
1458             }
1459             if (NULL == rsh_agent_argv[i]) {
1460                 opal_argv_append_nosize(&rsh_agent_argv, "-x");
1461             }
1462         }
1463     }
1464     if (NULL != bname) {
1465         free(bname);
1466     }
1467 
1468     /* the caller can append any additional argv's they desire */
1469     return ORTE_SUCCESS;
1470 }
1471 
1472 /**
1473  * Check the Shell variable and system type on the specified node
1474  */
1475 static int rsh_probe(char *nodename,
1476                      orte_plm_rsh_shell_t *shell)
1477 {
1478     char ** argv;
1479     int argc, rc = ORTE_SUCCESS, i;
1480     int fd[2];
1481     pid_t pid;
1482     char outbuf[4096];
1483 
1484     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1485                          "%s plm:rsh: going to check SHELL variable on node %s",
1486                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1487                          nodename));
1488 
1489     *shell = ORTE_PLM_RSH_SHELL_UNKNOWN;
1490     if (pipe(fd)) {
1491         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1492                              "%s plm:rsh: pipe failed with errno=%d",
1493                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1494                              errno));
1495         return ORTE_ERR_IN_ERRNO;
1496     }
1497     if ((pid = fork()) < 0) {
1498         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1499                              "%s plm:rsh: fork failed with errno=%d",
1500                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1501                              errno));
1502         return ORTE_ERR_IN_ERRNO;
1503     }
1504     else if (pid == 0) {          /* child */
1505         if (dup2(fd[1], 1) < 0) {
1506             OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1507                                  "%s plm:rsh: dup2 failed with errno=%d",
1508                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1509                                  errno));
1510             exit(01);
1511         }
1512         /* Build argv array */
1513         argv = opal_argv_copy(mca_plm_rsh_component.agent_argv);
1514         argc = opal_argv_count(mca_plm_rsh_component.agent_argv);
1515         opal_argv_append(&argc, &argv, nodename);
1516         opal_argv_append(&argc, &argv, "echo $SHELL");
1517 
1518         execvp(argv[0], argv);
1519         exit(errno);
1520     }
1521     if (close(fd[1])) {
1522         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1523                              "%s plm:rsh: close failed with errno=%d",
1524                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1525                              errno));
1526         return ORTE_ERR_IN_ERRNO;
1527     }
1528 
1529     {
1530         ssize_t ret = 1;
1531         char* ptr = outbuf;
1532         size_t outbufsize = sizeof(outbuf);
1533 
1534         do {
1535             ret = read (fd[0], ptr, outbufsize-1);
1536             if (ret < 0) {
1537                 if (errno == EINTR)
1538                     continue;
1539                 OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1540                                      "%s plm:rsh: Unable to detect the remote shell (error %s)",
1541                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1542                                      strerror(errno)));
1543                 rc = ORTE_ERR_IN_ERRNO;
1544                 break;
1545             }
1546             if( outbufsize > 1 ) {
1547                 outbufsize -= ret;
1548                 ptr += ret;
1549             }
1550         } while( 0 != ret );
1551         *ptr = '\0';
1552     }
1553     close(fd[0]);
1554 
1555     if( outbuf[0] != '\0' ) {
1556         char *sh_name = rindex(outbuf, '/');
1557         if( NULL != sh_name ) {
1558             sh_name++; /* skip '/' */
1559             /* Search for the substring of known shell-names */
1560             for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name)/
1561                                   sizeof(orte_plm_rsh_shell_name[0])); i++) {
1562                 if ( NULL != strstr(sh_name, orte_plm_rsh_shell_name[i]) ) {
1563                     *shell = (orte_plm_rsh_shell_t)i;
1564                     break;
1565                 }
1566             }
1567         }
1568     }
1569 
1570     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1571                          "%s plm:rsh: node %s has SHELL: %s",
1572                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1573                          nodename,
1574                          (ORTE_PLM_RSH_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_rsh_shell_name[*shell]));
1575 
1576     return rc;
1577 }
1578 
1579 static int setup_shell(orte_plm_rsh_shell_t *rshell,
1580                        orte_plm_rsh_shell_t *lshell,
1581                        char *nodename, int *argc, char ***argv)
1582 {
1583     orte_plm_rsh_shell_t remote_shell, local_shell;
1584     char *param;
1585     int rc;
1586 
1587     /* What is our local shell? */
1588     local_shell = ORTE_PLM_RSH_SHELL_UNKNOWN;
1589 
1590 #if OPAL_ENABLE_GETPWUID
1591     {
1592         struct passwd *p;
1593 
1594         p = getpwuid(getuid());
1595         if( NULL != p ) {
1596             param = p->pw_shell;
1597             local_shell = find_shell(p->pw_shell);
1598         }
1599     }
1600 #endif
1601 
1602     /* If we didn't find it in getpwuid(), try looking at the $SHELL
1603        environment variable (see https://svn.open-mpi.org/trac/ompi/ticket/1060)
1604     */
1605     if (ORTE_PLM_RSH_SHELL_UNKNOWN == local_shell &&
1606         NULL != (param = getenv("SHELL"))) {
1607         local_shell = find_shell(param);
1608     }
1609 
1610     if (ORTE_PLM_RSH_SHELL_UNKNOWN == local_shell) {
1611         opal_output(0, "WARNING: local probe returned unhandled shell:%s assuming bash\n",
1612                     (NULL != param) ? param : "unknown");
1613         local_shell = ORTE_PLM_RSH_SHELL_BASH;
1614     }
1615 
1616     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1617                          "%s plm:rsh: local shell: %d (%s)",
1618                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1619                          local_shell, orte_plm_rsh_shell_name[local_shell]));
1620 
1621     /* What is our remote shell? */
1622     if (mca_plm_rsh_component.assume_same_shell) {
1623         remote_shell = local_shell;
1624         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1625                              "%s plm:rsh: assuming same remote shell as local shell",
1626                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1627     } else {
1628         rc = rsh_probe(nodename, &remote_shell);
1629 
1630         if (ORTE_SUCCESS != rc) {
1631             ORTE_ERROR_LOG(rc);
1632             return rc;
1633         }
1634 
1635         if (ORTE_PLM_RSH_SHELL_UNKNOWN == remote_shell) {
1636             opal_output(0, "WARNING: rsh probe returned unhandled shell; assuming bash\n");
1637             remote_shell = ORTE_PLM_RSH_SHELL_BASH;
1638         }
1639     }
1640 
1641     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
1642                          "%s plm:rsh: remote shell: %d (%s)",
1643                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1644                          remote_shell, orte_plm_rsh_shell_name[remote_shell]));
1645 
1646     /* Do we need to source .profile on the remote side?
1647        - sh: yes (see bash(1))
1648        - ksh: yes (see ksh(1))
1649        - bash: no (see bash(1))
1650        - [t]csh: no (see csh(1) and tcsh(1))
1651        - zsh: no (see http://zsh.sourceforge.net/FAQ/zshfaq03.html#l19)
1652     */
1653 
1654     if (ORTE_PLM_RSH_SHELL_SH == remote_shell ||
1655         ORTE_PLM_RSH_SHELL_KSH == remote_shell) {
1656         int i;
1657         char **tmp;
1658         tmp = opal_argv_split("( test ! -r ./.profile || . ./.profile;", ' ');
1659         if (NULL == tmp) {
1660             return ORTE_ERR_OUT_OF_RESOURCE;
1661         }
1662         for (i = 0; NULL != tmp[i]; ++i) {
1663             opal_argv_append(argc, argv, tmp[i]);
1664         }
1665         opal_argv_free(tmp);
1666     }
1667 
1668     /* pass results back */
1669     *rshell = remote_shell;
1670     *lshell = local_shell;
1671 
1672     return ORTE_SUCCESS;
1673 }

/* [<][>][^][v][top][bottom][index][help] */