root/orte/mca/odls/pspawn/odls_pspawn.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. odls_pspawn_kill_local
  2. orte_odls_pspawn_kill_local_procs
  3. close_open_file_descriptors
  4. odls_pspawn_fork_local_proc
  5. orte_odls_pspawn_launch_local_procs
  6. send_signal
  7. orte_odls_pspawn_signal_local_procs
  8. orte_odls_pspawn_restart_proc

   1 /*
   2  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2008 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007-2010 Oracle and/or its affiliates.  All rights reserved.
  13  * Copyright (c) 2007      Evergrid, Inc. All rights reserved.
  14  * Copyright (c) 2008-2017 Cisco Systems, Inc.  All rights reserved
  15  * Copyright (c) 2010      IBM Corporation.  All rights reserved.
  16  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.  All rights
  17  *                         reserved.
  18  * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
  19  * Copyright (c) 2017      Rutgers, The State University of New Jersey.
  20  *                         All rights reserved.
  21  * Copyright (c) 2017      Research Organization for Information Science
  22  *                         and Technology (RIST). All rights reserved.
  23  *
  24  * $COPYRIGHT$
  25  *
  26  * Additional copyrights may follow
  27  *
  28  * $HEADER$
  29  */
  30 
  31 /*
  32  * There is a complicated sequence of events that occurs when the
  33  * parent forks a child process that is intended to launch the target
  34  * executable.
  35  *
  36  * Before the child process exec's the target executable, it might tri
  37  * to set the affinity of that new child process according to a
  38  * complex series of rules.  This binding may fail in a myriad of
  39  * different ways.  A lot of this code deals with reporting that error
  40  * occurately to the end user.  This is a complex task in itself
  41  * because the child process is not "really" an ORTE process -- all
  42  * error reporting must be proxied up to the parent who can use normal
  43  * ORTE error reporting mechanisms.
  44  *
  45  * Here's a high-level description of what is occurring in this file:
  46  *
  47  * - parent opens a pipe
  48  * - parent forks a child
  49  * - parent blocks reading on the pipe: the pipe will either close
  50  *   (indicating that the child successfully exec'ed) or the child will
  51  *   write some proxied error data up the pipe
  52  *
  53  * - the child tries to set affinity and do other housekeeping in
  54  *   preparation of exec'ing the target executable
  55  * - if the child fails anywhere along the way, it sends a message up
  56  *   the pipe to the parent indicating what happened -- including a
  57  *   rendered error message detailing the problem (i.e., human-readable).
  58  * - it is important that the child renders the error message: there
  59  *   are so many errors that are possible that the child is really the
  60  *   only entity that has enough information to make an accuate error string
  61  *   to report back to the user.
  62  * - the parent reads this message + rendered string in and uses ORTE
  63  *   reporting mechanisms to display it to the user
  64  * - if the problem was only a warning, the child continues processing
  65  *   (potentially eventually exec'ing the target executable).
  66  * - if the problem was an error, the child exits and the parent
  67  *   handles the death of the child as appropriate (i.e., this ODLS
  68  *   simply reports the error -- other things decide what to do).
  69  */
  70 
  71 #include "orte_config.h"
  72 #include "orte/constants.h"
  73 #include "orte/types.h"
  74 
  75 #include <string.h>
  76 #include <stdlib.h>
  77 #ifdef HAVE_UNISTD_H
  78 #include <unistd.h>
  79 #endif
  80 #include <errno.h>
  81 #ifdef HAVE_SYS_TYPES_H
  82 #include <sys/types.h>
  83 #endif
  84 #ifdef HAVE_SYS_WAIT_H
  85 #include <sys/wait.h>
  86 #endif
  87 #include <signal.h>
  88 #ifdef HAVE_FCNTL_H
  89 #include <fcntl.h>
  90 #endif
  91 #ifdef HAVE_SYS_TIME_H
  92 #include <sys/time.h>
  93 #endif
  94 #ifdef HAVE_SYS_PARAM_H
  95 #include <sys/param.h>
  96 #endif
  97 #ifdef HAVE_NETDB_H
  98 #include <netdb.h>
  99 #endif
 100 #include <stdlib.h>
 101 #ifdef HAVE_SYS_STAT_H
 102 #include <sys/stat.h>
 103 #endif  /* HAVE_SYS_STAT_H */
 104 #include <stdarg.h>
 105 #ifdef HAVE_SYS_SELECT_H
 106 #include <sys/select.h>
 107 #endif
 108 #ifdef HAVE_DIRENT_H
 109 #include <dirent.h>
 110 #endif
 111 #include <ctype.h>
 112 #ifdef HAVE_UTIL_H
 113 #include <util.h>
 114 #endif
 115 #ifdef HAVE_PTY_H
 116 #include <pty.h>
 117 #endif
 118 #ifdef HAVE_FCNTL_H
 119 #include <fcntl.h>
 120 #endif
 121 #ifdef HAVE_TERMIOS_H
 122 #include <termios.h>
 123 # ifdef HAVE_TERMIO_H
 124 #  include <termio.h>
 125 # endif
 126 #endif
 127 #ifdef HAVE_LIBUTIL_H
 128 #include <libutil.h>
 129 #endif
 130 
 131 #include <spawn.h>
 132 
 133 #include "opal/mca/hwloc/hwloc-internal.h"
 134 #include "opal/mca/hwloc/base/base.h"
 135 #include "opal/class/opal_pointer_array.h"
 136 #include "opal/util/opal_environ.h"
 137 #include "opal/util/show_help.h"
 138 #include "opal/util/sys_limits.h"
 139 #include "opal/util/fd.h"
 140 
 141 #include "orte/util/show_help.h"
 142 #include "orte/runtime/orte_wait.h"
 143 #include "orte/runtime/orte_globals.h"
 144 #include "orte/mca/errmgr/errmgr.h"
 145 #include "orte/mca/ess/ess.h"
 146 #include "orte/mca/iof/base/iof_base_setup.h"
 147 #include "orte/mca/plm/plm.h"
 148 #include "orte/mca/rtc/rtc.h"
 149 #include "orte/util/name_fns.h"
 150 #include "orte/util/threads.h"
 151 
 152 #include "orte/mca/odls/base/base.h"
 153 #include "orte/mca/odls/base/odls_private.h"
 154 #include "orte/mca/odls/pspawn/odls_pspawn.h"
 155 #include "orte/orted/pmix/pmix_server.h"
 156 
 157 /*
 158  * Module functions (function pointers used in a struct)
 159  */
 160 static int orte_odls_pspawn_launch_local_procs(opal_buffer_t *data);
 161 static int orte_odls_pspawn_kill_local_procs(opal_pointer_array_t *procs);
 162 static int orte_odls_pspawn_signal_local_procs(const orte_process_name_t *proc, int32_t signal);
 163 static int orte_odls_pspawn_restart_proc(orte_proc_t *child);
 164 
 165 
 166 /*
 167  * Module
 168  */
 169 orte_odls_base_module_t orte_odls_pspawn_module = {
 170     .get_add_procs_data = orte_odls_base_default_get_add_procs_data,
 171     .launch_local_procs = orte_odls_pspawn_launch_local_procs,
 172     .kill_local_procs = orte_odls_pspawn_kill_local_procs,
 173     .signal_local_procs = orte_odls_pspawn_signal_local_procs,
 174     .restart_proc = orte_odls_pspawn_restart_proc
 175 };
 176 
 177 
 178 /* deliver a signal to a specified pid. */
 179 static int odls_pspawn_kill_local(pid_t pid, int signum)
 180 {
 181     pid_t pgrp;
 182 
 183 #if HAVE_SETPGID
 184     pgrp = getpgid(pid);
 185     if (-1 != pgrp) {
 186         /* target the lead process of the process
 187          * group so we ensure that the signal is
 188          * seen by all members of that group. This
 189          * ensures that the signal is seen by any
 190          * child processes our child may have
 191          * started
 192          */
 193         pid = -pgrp;
 194     }
 195 #endif
 196 
 197     if (0 != kill(pid, signum)) {
 198         if (ESRCH != errno) {
 199             OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
 200                                  "%s odls:pspawn:SENT KILL %d TO PID %d GOT ERRNO %d",
 201                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), signum, (int)pid, errno));
 202             return errno;
 203         }
 204     }
 205     OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
 206                          "%s odls:pspawn:SENT KILL %d TO PID %d SUCCESS",
 207                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), signum, (int)pid));
 208     return 0;
 209 }
 210 
 211 int orte_odls_pspawn_kill_local_procs(opal_pointer_array_t *procs)
 212 {
 213     int rc;
 214 
 215     if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(procs,
 216                                             odls_pspawn_kill_local))) {
 217         ORTE_ERROR_LOG(rc);
 218         return rc;
 219     }
 220     return ORTE_SUCCESS;
 221 }
 222 
 223 
 224 
 225 /* close all open file descriptors w/ exception of stdin/stdout/stderr
 226    and the pipe up to the parent. */
 227 static int close_open_file_descriptors(posix_spawn_file_actions_t *factions)
 228 {
 229     DIR *dir = opendir("/proc/self/fd");
 230     if (NULL == dir) {
 231         return ORTE_ERR_FILE_OPEN_FAILURE;
 232     }
 233     struct dirent *files;
 234 
 235     /* grab the fd of the opendir above so we don't close in the 
 236      * middle of the scan. */
 237     int dir_scan_fd = dirfd(dir);
 238     if(dir_scan_fd < 0 ) {
 239         return ORTE_ERR_FILE_OPEN_FAILURE;
 240     }
 241 
 242     while (NULL != (files = readdir(dir))) {
 243         if (!isdigit(files->d_name[0])) {
 244             continue;
 245         }
 246         int fd = strtol(files->d_name, NULL, 10);
 247         if (errno == EINVAL || errno == ERANGE) {
 248             closedir(dir);
 249             return ORTE_ERR_TYPE_MISMATCH;
 250         }
 251         if (fd >=3 && fd != dir_scan_fd) {
 252             posix_spawn_file_actions_addclose(factions, fd);
 253         }
 254     }
 255     closedir(dir);
 256     return ORTE_SUCCESS;
 257 }
 258 
 259 /**
 260  *  posix_spawn the specified processes
 261  */
 262 static int odls_pspawn_fork_local_proc(void *cdptr)
 263 {
 264     orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cdptr;
 265     pid_t pid;
 266     orte_proc_t *child = cd->child;
 267     posix_spawn_file_actions_t factions;
 268     posix_spawnattr_t attrs;
 269     sigset_t sigs;
 270     int rc;
 271     orte_iof_base_io_conf_t *opts = &cd->opts;
 272 
 273     ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE);
 274 
 275     /* setup the attrs object */
 276     rc = posix_spawnattr_init(&attrs);
 277     if (0 != rc) {
 278         child->state = ORTE_PROC_STATE_FAILED_TO_START;
 279         child->exit_code = 1;
 280         return ORTE_ERROR;
 281     }
 282     /* set the signal mask in the child process */
 283     sigprocmask(0, 0, &sigs);
 284     sigprocmask(SIG_UNBLOCK, &sigs, 0);
 285     posix_spawnattr_setsigmask(&attrs, &sigs);
 286 
 287     /* setup to close all fd's other than stdin/out/err */
 288     rc = posix_spawn_file_actions_init(&factions);
 289     if (0 != rc) {
 290         posix_spawnattr_destroy(&attrs);
 291         child->state = ORTE_PROC_STATE_FAILED_TO_START;
 292         child->exit_code = 1;
 293         return ORTE_ERROR;
 294     }
 295     if (ORTE_SUCCESS != close_open_file_descriptors(&factions)) {
 296         posix_spawn_file_actions_destroy(&factions);
 297         posix_spawnattr_destroy(&attrs);
 298         child->state = ORTE_PROC_STATE_FAILED_TO_START;
 299         child->exit_code = 1;
 300         return ORTE_ERROR;
 301     }
 302     /* close the parent end of the pipes in the child */
 303     if (opts->connect_stdin) {
 304         posix_spawn_file_actions_addclose(&factions, opts->p_stdin[1]);
 305     }
 306     posix_spawn_file_actions_addclose(&factions, opts->p_stdout[0]);
 307     if( !orte_iof_base.redirect_app_stderr_to_stdout ) {
 308         posix_spawn_file_actions_addclose(&factions, opts->p_stderr[0]);
 309     }
 310     /* dup the stdin/stdout/stderr descriptors */
 311     if (opts->usepty) {
 312         /* disable echo */
 313         struct termios term_attrs;
 314         if (tcgetattr(opts->p_stdout[1], &term_attrs) < 0) {
 315             return ORTE_ERR_PIPE_SETUP_FAILURE;
 316         }
 317         term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK |
 318                                  ECHOCTL | ECHOKE | ECHONL);
 319         term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON);
 320         term_attrs.c_oflag &= ~ (
 321 #ifdef OCRNL
 322                                  /* OS X 10.3 does not have this
 323                                     value defined */
 324                                  OCRNL |
 325 #endif
 326                                  ONLCR);
 327         if (tcsetattr(opts->p_stdout[1], TCSANOW, &term_attrs) == -1) {
 328             return ORTE_ERR_PIPE_SETUP_FAILURE;
 329         }
 330         posix_spawn_file_actions_adddup2(&factions, fileno(stdout), opts->p_stdout[1]);
 331         if (orte_iof_base.redirect_app_stderr_to_stdout) {
 332             posix_spawn_file_actions_adddup2(&factions, fileno(stderr), opts->p_stdout[1]);
 333         }
 334     } else {
 335         if (opts->p_stdout[1] != fileno(stdout)) {
 336             posix_spawn_file_actions_adddup2(&factions, fileno(stdout), opts->p_stdout[1]);
 337         }
 338         if (orte_iof_base.redirect_app_stderr_to_stdout) {
 339             posix_spawn_file_actions_adddup2(&factions, fileno(stderr), opts->p_stdout[1]);
 340         }
 341     }
 342     if (opts->connect_stdin) {
 343         if (opts->p_stdin[0] != fileno(stdin)) {
 344             posix_spawn_file_actions_adddup2(&factions, fileno(stdin), opts->p_stdin[0]);
 345         }
 346     }
 347     if (opts->p_stderr[1] != fileno(stderr) && !orte_iof_base.redirect_app_stderr_to_stdout) {
 348         posix_spawn_file_actions_adddup2(&factions, fileno(stderr), opts->p_stderr[1]);
 349     }
 350 
 351     /* Fork off the child */
 352     rc = posix_spawn(&pid, cd->app->app, &factions, &attrs, cd->argv, cd->env);
 353     posix_spawn_file_actions_destroy(&factions);
 354     posix_spawnattr_destroy(&attrs);
 355 
 356     /* as the parent, close the other ends of the pipes */
 357     if (cd->opts.connect_stdin) {
 358         close(cd->opts.p_stdin[0]);
 359     }
 360     close(cd->opts.p_stdout[1]);
 361     if( !orte_iof_base.redirect_app_stderr_to_stdout ) {
 362         close(cd->opts.p_stderr[1]);
 363     }
 364 
 365     if (rc < 0) {
 366         ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
 367         child->state = ORTE_PROC_STATE_FAILED_TO_START;
 368         child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN;
 369         return ORTE_ERR_SYS_LIMITS_CHILDREN;
 370     }
 371 
 372     cd->child->state = ORTE_PROC_STATE_RUNNING;
 373     cd->child->pid = pid;
 374     ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE);
 375     return ORTE_SUCCESS;
 376 }
 377 
 378 
 379 /**
 380  * Launch all processes allocated to the current node.
 381  */
 382 
 383 int orte_odls_pspawn_launch_local_procs(opal_buffer_t *data)
 384 {
 385     int rc;
 386     orte_jobid_t job;
 387 
 388     /* construct the list of children we are to launch */
 389     if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) {
 390         OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
 391                              "%s odls:pspawn:launch:local failed to construct child list on error %s",
 392                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
 393         return rc;
 394     }
 395 
 396     /* launch the local procs */
 397     ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_pspawn_fork_local_proc);
 398 
 399     return ORTE_SUCCESS;
 400 }
 401 
 402 
 403 /**
 404  * Send a signal to a pid.  Note that if we get an error, we set the
 405  * return value and let the upper layer print out the message.
 406  */
 407 static int send_signal(pid_t pd, int signal)
 408 {
 409     int rc = ORTE_SUCCESS;
 410     pid_t pid;
 411 
 412     if (orte_odls_globals.signal_direct_children_only) {
 413         pid = pd;
 414     } else {
 415 #if HAVE_SETPGID
 416         /* send to the process group so that any children of our children
 417          * also receive the signal*/
 418         pid = -pd;
 419 #else
 420         pid = pd;
 421 #endif
 422     }
 423 
 424     OPAL_OUTPUT_VERBOSE((1, orte_odls_base_framework.framework_output,
 425                          "%s sending signal %d to pid %ld",
 426                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 427                          signal, (long)pid));
 428 
 429     if (kill(pid, signal) != 0) {
 430         switch(errno) {
 431             case EINVAL:
 432                 rc = ORTE_ERR_BAD_PARAM;
 433                 break;
 434             case ESRCH:
 435                 /* This case can occur when we deliver a signal to a
 436                    process that is no longer there.  This can happen if
 437                    we deliver a signal while the job is shutting down.
 438                    This does not indicate a real problem, so just
 439                    ignore the error.  */
 440                 break;
 441             case EPERM:
 442                 rc = ORTE_ERR_PERM;
 443                 break;
 444             default:
 445                 rc = ORTE_ERROR;
 446         }
 447     }
 448 
 449     return rc;
 450 }
 451 
 452 static int orte_odls_pspawn_signal_local_procs(const orte_process_name_t *proc, int32_t signal)
 453 {
 454     int rc;
 455 
 456     if (ORTE_SUCCESS != (rc = orte_odls_base_default_signal_local_procs(proc, signal, send_signal))) {
 457         ORTE_ERROR_LOG(rc);
 458         return rc;
 459     }
 460     return ORTE_SUCCESS;
 461 }
 462 
 463 static int orte_odls_pspawn_restart_proc(orte_proc_t *child)
 464 {
 465     int rc;
 466 
 467     /* restart the local proc */
 468     if (ORTE_SUCCESS != (rc = orte_odls_base_default_restart_proc(child, odls_pspawn_fork_local_proc))) {
 469         OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
 470                              "%s odls:pspawn:restart_proc failed to launch on error %s",
 471                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
 472     }
 473     return rc;
 474 }

/* [<][>][^][v][top][bottom][index][help] */