root/orte/mca/plm/lsf/plm_lsf_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. plm_lsf_init
  2. plm_lsf_launch_job
  3. launch_daemons
  4. plm_lsf_terminate_orteds
  5. plm_lsf_signal_job
  6. plm_lsf_finalize

   1 /*
   2  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2008 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006-2007 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2007-2012 Los Alamos National Security, LLC.  All rights
  14  *                         reserved.
  15  * Copyright (c) 2008      Institut National de Recherche en Informatique
  16  *                         et Automatique. All rights reserved.
  17  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
  18  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  *
  25  * These symbols are in a file by themselves to provide nice linker
  26  * semantics.  Since linkers generally pull in symbols by object
  27  * files, keeping these symbols as the only symbols in this file
  28  * prevents utility programs such as "ompi_info" from having to import
  29  * entire components just to query their version and parameters.
  30  */
  31 
  32 #include "orte_config.h"
  33 #include "orte/constants.h"
  34 #include "orte/types.h"
  35 
  36 #include <sys/types.h>
  37 #ifdef HAVE_UNISTD_H
  38 #include <unistd.h>
  39 #endif
  40 #include <signal.h>
  41 #include <stdlib.h>
  42 #ifdef HAVE_SYS_TYPES_H
  43 #include <sys/types.h>
  44 #endif
  45 #ifdef HAVE_SYS_TIME_H
  46 #include <sys/time.h>
  47 #endif
  48 #ifdef HAVE_SYS_STAT_H
  49 #include <sys/stat.h>
  50 #endif
  51 #ifdef HAVE_FCNTL_H
  52 #include <fcntl.h>
  53 #endif
  54 
  55 #define SR1_PJOBS
  56 #include <lsf/lsbatch.h>
  57 
  58 #include "opal/mca/base/base.h"
  59 #include "opal/mca/installdirs/installdirs.h"
  60 #include "opal/util/argv.h"
  61 #include "opal/util/output.h"
  62 #include "opal/util/opal_environ.h"
  63 
  64 #include "orte/util/show_help.h"
  65 #include "orte/runtime/orte_globals.h"
  66 #include "orte/runtime/orte_wait.h"
  67 #include "orte/mca/errmgr/errmgr.h"
  68 #include "orte/mca/rmaps/rmaps.h"
  69 #include "orte/mca/state/state.h"
  70 #include "orte/util/threads.h"
  71 
  72 #include "orte/mca/plm/plm.h"
  73 #include "orte/mca/plm/base/base.h"
  74 #include "orte/mca/plm/base/plm_private.h"
  75 #include "plm_lsf.h"
  76 
  77 
  78 /*
  79  * Local functions
  80  */
  81 static int plm_lsf_init(void);
  82 static int plm_lsf_launch_job(orte_job_t *jdata);
  83 static int plm_lsf_terminate_orteds(void);
  84 static int plm_lsf_signal_job(orte_jobid_t jobid, int32_t signal);
  85 static int plm_lsf_finalize(void);
  86 
  87 
  88 /*
  89  * Global variable
  90  */
  91 orte_plm_base_module_t orte_plm_lsf_module = {
  92     plm_lsf_init,
  93     orte_plm_base_set_hnp_name,
  94     plm_lsf_launch_job,
  95     NULL,
  96     orte_plm_base_orted_terminate_job,
  97     plm_lsf_terminate_orteds,
  98     orte_plm_base_orted_kill_local_procs,
  99     plm_lsf_signal_job,
 100     plm_lsf_finalize
 101 };
 102 
 103 static void launch_daemons(int fd, short args, void *cbdata);
 104 
 105 /**
 106  * Init the module
 107  */
 108 int plm_lsf_init(void)
 109 {
 110     int rc;
 111 
 112     if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) {
 113         ORTE_ERROR_LOG(rc);
 114     }
 115 
 116     if (orte_do_not_launch) {
 117         /* must assign daemons as won't be launching them */
 118         orte_plm_globals.daemon_nodes_assigned_at_launch = true;
 119     } else {
 120         /* we do NOT assign daemons to nodes at launch - we will
 121          * determine that mapping when the daemon
 122          * calls back. This is required because lsf does
 123          * its own mapping of proc-to-node, and we cannot know
 124          * in advance which daemon will wind up on which node
 125          */
 126         orte_plm_globals.daemon_nodes_assigned_at_launch = false;
 127     }
 128 
 129     /* point to our launch command */
 130     if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_LAUNCH_DAEMONS,
 131                                                        launch_daemons, ORTE_SYS_PRI))) {
 132         ORTE_ERROR_LOG(rc);
 133         return rc;
 134     }
 135 
 136     return rc;
 137 }
 138 
 139 /* When working in this function, ALWAYS jump to "cleanup" if
 140  * you encounter an error so that orterun will be woken up and
 141  * the job can cleanly terminate
 142  */
 143 static int plm_lsf_launch_job(orte_job_t *jdata)
 144 {
 145     if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
 146         /* this is a restart situation - skip to the mapping stage */
 147         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
 148     } else {
 149         /* new job - set it up */
 150         ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
 151     }
 152     return ORTE_SUCCESS;
 153 }
 154 
 155 static void launch_daemons(int fd, short args, void *cbdata)
 156 {
 157     orte_job_map_t *map;
 158     size_t num_nodes;
 159     char *param;
 160     char **argv = NULL;
 161     int argc;
 162     int rc;
 163     char** env = NULL;
 164     char **nodelist_argv;
 165     int nodelist_argc;
 166     char *vpid_string;
 167     int i;
 168     char *cur_prefix;
 169     int proc_vpid_index = 0;
 170     bool failed_launch = true;
 171     orte_app_context_t *app;
 172     orte_node_t *node;
 173     orte_std_cntr_t nnode;
 174     orte_job_t *daemons;
 175     orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
 176     orte_job_t *jdata;
 177 
 178     ORTE_ACQUIRE_OBJECT(state);
 179     jdata  = state->jdata;
 180 
 181     /* start by setting up the virtual machine */
 182     daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
 183     if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(jdata))) {
 184         ORTE_ERROR_LOG(rc);
 185         goto cleanup;
 186     }
 187 
 188     /* if we don't want to launch, then don't attempt to
 189      * launch the daemons - the user really wants to just
 190      * look at the proposed process map
 191      */
 192     if (orte_do_not_launch) {
 193         /* set the state to indicate the daemons reported - this
 194          * will trigger the daemons_reported event and cause the
 195          * job to move to the following step
 196          */
 197         state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
 198         ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
 199         OBJ_RELEASE(state);
 200         return;
 201     }
 202 
 203     OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 204                          "%s plm:lsf: launching vm",
 205                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 206 
 207 
 208     /* Get the map for this job */
 209     if (NULL == (map = daemons->map)) {
 210         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 211         rc = ORTE_ERR_NOT_FOUND;
 212         goto cleanup;
 213     }
 214 
 215     num_nodes = map->num_new_daemons;
 216     if (0 == num_nodes) {
 217         /* set the state to indicate the daemons reported - this
 218          * will trigger the daemons_reported event and cause the
 219          * job to move to the following step
 220          */
 221         OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 222                              "%s plm:lsf: no new daemons to launch",
 223                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 224         state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
 225         ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
 226         OBJ_RELEASE(state);
 227         return;
 228     }
 229 
 230     /* create nodelist */
 231     nodelist_argv = NULL;
 232     nodelist_argc = 0;
 233 
 234     for (nnode=0; nnode < map->nodes->size; nnode++) {
 235         if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
 236             continue;
 237         }
 238         /* if the daemon already exists on this node, then
 239          * don't include it
 240          */
 241         if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
 242             continue;
 243         }
 244 
 245         /* otherwise, add it to the list of nodes upon which
 246          * we need to launch a daemon
 247          */
 248         opal_argv_append(&nodelist_argc, &nodelist_argv, node->name);
 249     }
 250 
 251     /*
 252      * start building argv array
 253      */
 254     argv = NULL;
 255     argc = 0;
 256 
 257     /*
 258      * ORTED OPTIONS
 259      */
 260 
 261     /* add the daemon command (as specified by user) */
 262     orte_plm_base_setup_orted_cmd(&argc, &argv);
 263 
 264 
 265     /* Add basic orted command line options */
 266     orte_plm_base_orted_append_basic_args(&argc, &argv,
 267                                           "lsf",
 268                                           &proc_vpid_index);
 269 
 270     /* tell the new daemons the base of the name list so they can compute
 271      * their own name on the other end
 272      */
 273     rc = orte_util_convert_vpid_to_string(&vpid_string, map->daemon_vpid_start);
 274     if (ORTE_SUCCESS != rc) {
 275         opal_output(0, "plm_lsf: unable to get daemon vpid as string");
 276         goto cleanup;
 277     }
 278     free(argv[proc_vpid_index]);
 279     argv[proc_vpid_index] = strdup(vpid_string);
 280     free(vpid_string);
 281 
 282     /* protect the args in case someone has a script wrapper */
 283     mca_base_cmd_line_wrap_args(argv);
 284 
 285     if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
 286         param = opal_argv_join(argv, ' ');
 287         if (NULL != param) {
 288             opal_output(0, "plm:lsf: final top-level argv:");
 289             opal_output(0, "plm:lsf:     %s", param);
 290             free(param);
 291         }
 292     }
 293 
 294     /* Copy the prefix-directory specified in the
 295        corresponding app_context.  If there are multiple,
 296        different prefix's in the app context, complain (i.e., only
 297        allow one --prefix option for the entire lsf run -- we
 298        don't support different --prefix'es for different nodes in
 299        the LSF plm) */
 300     cur_prefix = NULL;
 301     for (i=0; i < jdata->apps->size; i++) {
 302         char *app_prefix_dir=NULL;
 303         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 304             continue;
 305         }
 306         if (orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)&app_prefix_dir, OPAL_STRING) &&
 307             NULL != app_prefix_dir) {
 308             /* Check for already set cur_prefix_dir -- if different,
 309                complain */
 310             if (NULL != cur_prefix &&
 311                 0 != strcmp (cur_prefix, app_prefix_dir)) {
 312                 orte_show_help("help-plm-lsf.txt", "multiple-prefixes",
 313                                true, cur_prefix, app_prefix_dir);
 314                 rc = ORTE_ERR_FAILED_TO_START;
 315                 goto cleanup;
 316             }
 317 
 318             /* If not yet set, copy it; iff set, then it's the
 319                same anyway */
 320             if (NULL == cur_prefix) {
 321                 cur_prefix = strdup(app_prefix_dir);
 322                 OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
 323                                      "%s plm:lsf: Set prefix:%s",
 324                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_prefix));
 325             }
 326             free(app_prefix_dir);
 327         }
 328     }
 329 
 330     /* setup environment */
 331     env = opal_argv_copy(orte_launch_environ);
 332 
 333     /* lsb_launch tampers with SIGCHLD.
 334      * After the call to lsb_launch, the signal handler for SIGCHLD is NULL.
 335      * So, we disable the SIGCHLD handler of libevent for the duration of
 336      * the call to lsb_launch
 337      */
 338     orte_wait_disable();
 339 
 340     /* exec the daemon(s). Do NOT wait for lsb_launch to complete as
 341      * it only completes when the processes it starts - in this case,
 342      * the orteds - complete. We need to go ahead and return so
 343      * orterun can do the rest of its stuff. Instead, we'll catch any
 344      * failures and deal with them elsewhere
 345      */
 346     if ( (rc = lsb_launch(nodelist_argv, argv, LSF_DJOB_REPLACE_ENV | LSF_DJOB_NOWAIT, env)) < 0) {
 347         ORTE_ERROR_LOG(ORTE_ERR_FAILED_TO_START);
 348         char *flattened_nodelist = NULL;
 349         flattened_nodelist = opal_argv_join(nodelist_argv, '\n');
 350         orte_show_help("help-plm-lsf.txt", "lsb_launch-failed",
 351                        true, rc, lsberrno, lsb_sysmsg(),
 352                        opal_argv_count(nodelist_argv), flattened_nodelist);
 353         free(flattened_nodelist);
 354         rc = ORTE_ERR_FAILED_TO_START;
 355         orte_wait_enable();  /* re-enable our SIGCHLD handler */
 356         goto cleanup;
 357     }
 358     orte_wait_enable();  /* re-enable our SIGCHLD handler */
 359 
 360     /* indicate that the daemons for this job were launched */
 361     state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
 362     daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
 363 
 364     /* flag that launch was successful, so far as we currently know */
 365     failed_launch = false;
 366 
 367  cleanup:
 368     if (NULL != argv) {
 369         opal_argv_free(argv);
 370     }
 371     if (NULL != env) {
 372         opal_argv_free(env);
 373     }
 374 
 375     /* cleanup the caddy */
 376     OBJ_RELEASE(state);
 377 
 378     /* check for failed launch - if so, force terminate */
 379     if (failed_launch) {
 380         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 381     }
 382 }
 383 
 384 
 385 /**
 386 * Terminate the orteds for a given job
 387  */
 388 static int plm_lsf_terminate_orteds(void)
 389 {
 390     int rc;
 391 
 392     if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
 393         ORTE_ERROR_LOG(rc);
 394     }
 395 
 396     return rc;
 397 }
 398 
 399 
 400 /**
 401  * Signal all the processes in the job
 402  */
 403 static int plm_lsf_signal_job(orte_jobid_t jobid, int32_t signal)
 404 {
 405     int rc;
 406 
 407     /* order the orteds to pass this signal to their local procs */
 408     if (ORTE_SUCCESS != (rc = orte_plm_base_orted_signal_local_procs(jobid, signal))) {
 409         ORTE_ERROR_LOG(rc);
 410     }
 411     return rc;
 412 }
 413 
 414 
 415 static int plm_lsf_finalize(void)
 416 {
 417     int rc;
 418 
 419     /* cleanup any pending recvs */
 420     if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
 421         ORTE_ERROR_LOG(rc);
 422     }
 423 
 424     return ORTE_SUCCESS;
 425 }

/* [<][>][^][v][top][bottom][index][help] */