This source file includes following definitions.
- MPIR_Breakpoint
- tcon
- tdes
- orte_submit_init
- print_help
- orte_submit_finalize
- orte_submit_cancel
- orte_submit_halt
- orte_submit_job
- init_globals
- parse_globals
- parse_locals
- create_app
- set_classpath_jar_file
- parse_appfile
- launch_recv
- complete_recv
- orte_debugger_init_before_spawn
- _send_notification
- orte_debugger_dump
- setup_debugger_job
- orte_debugger_init_after_spawn
- process
- open_fifo
- attach_debugger
- build_debugger_args
- run_debugger
- orte_debugger_detached
- stack_trace_recv
- stack_trace_timeout
- orte_timeout_wakeup
- profile_timeout
- profile_recv
- orte_profile_wakeup
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 #include "orte_config.h"
  29 #include "orte/constants.h"
  30 
  31 #include <string.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #ifdef HAVE_STRINGS_H
  35 #include <strings.h>
  36 #endif  
  37 #ifdef HAVE_UNISTD_H
  38 #include <unistd.h>
  39 #endif
  40 #ifdef HAVE_SYS_PARAM_H
  41 #include <sys/param.h>
  42 #endif
  43 #include <errno.h>
  44 #include <signal.h>
  45 #include <ctype.h>
  46 #ifdef HAVE_SYS_TYPES_H
  47 #include <sys/types.h>
  48 #endif  
  49 #ifdef HAVE_SYS_WAIT_H
  50 #include <sys/wait.h>
  51 #endif  
  52 #ifdef HAVE_SYS_TIME_H
  53 #include <sys/time.h>
  54 #endif  
  55 #include <fcntl.h>
  56 #ifdef HAVE_SYS_STAT_H
  57 #include <sys/stat.h>
  58 #endif
  59 #include <poll.h>
  60 
  61 #include "opal/dss/dss.h"
  62 #include "opal/mca/event/event.h"
  63 #include "opal/mca/installdirs/installdirs.h"
  64 #include "opal/mca/hwloc/base/base.h"
  65 #include "opal/mca/base/base.h"
  66 #include "opal/mca/pmix/pmix.h"
  67 #include "opal/util/argv.h"
  68 #include "opal/util/output.h"
  69 #include "opal/util/basename.h"
  70 #include "opal/util/cmd_line.h"
  71 #include "opal/util/opal_environ.h"
  72 #include "opal/util/opal_getcwd.h"
  73 #include "opal/util/show_help.h"
  74 #include "opal/util/fd.h"
  75 #include "opal/util/string_copy.h"
  76 #include "opal/sys/atomic.h"
  77 #if OPAL_ENABLE_FT_CR == 1
  78 #include "opal/runtime/opal_cr.h"
  79 #endif
  80 
  81 #include "opal/version.h"
  82 #include "opal/runtime/opal.h"
  83 #include "opal/runtime/opal_info_support.h"
  84 #include "opal/util/os_path.h"
  85 #include "opal/util/path.h"
  86 #include "opal/class/opal_pointer_array.h"
  87 #include "opal/dss/dss.h"
  88 
  89 #include "orte/mca/odls/odls_types.h"
  90 #include "orte/mca/plm/plm.h"
  91 #include "orte/mca/rmaps/rmaps_types.h"
  92 #include "orte/mca/rmaps/base/base.h"
  93 
  94 #include "orte/mca/errmgr/errmgr.h"
  95 #include "orte/mca/grpcomm/grpcomm.h"
  96 #include "orte/mca/oob/base/base.h"
  97 #include "orte/mca/plm/base/plm_private.h"
  98 #include "orte/mca/rml/rml.h"
  99 #include "orte/mca/rml/base/rml_contact.h"
 100 #include "orte/mca/routed/routed.h"
 101 #include "orte/mca/schizo/base/base.h"
 102 #include "orte/mca/state/state.h"
 103 
 104 #include "orte/runtime/runtime.h"
 105 #include "orte/runtime/orte_globals.h"
 106 #include "orte/runtime/orte_wait.h"
 107 #include "orte/runtime/orte_quit.h"
 108 #include "orte/util/pre_condition_transports.h"
 109 #include "orte/util/show_help.h"
 110 
 111 #include "orted_submit.h"
 112 
 113 
 114 
 115 
 116 orte_cmd_options_t orte_cmd_options = {0};
 117 opal_cmd_line_t *orte_cmd_line = NULL;
 118 
 119 static char **global_mca_env = NULL;
 120 static orte_std_cntr_t total_num_apps = 0;
 121 static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
 122 static opal_pointer_array_t tool_jobs;
 123 static int timeout_seconds;
 124 static orte_timer_t *orte_memprofile_timeout;
 125 
 126 int orte_debugger_attach_fd = -1;
 127 bool orte_debugger_fifo_active=false;
 128 opal_event_t *orte_debugger_attach=NULL;
 129 
 130 
 131 
 132 
 133 static int create_app(int argc, char* argv[],
 134                       orte_job_t *jdata,
 135                       orte_app_context_t **app,
 136                       bool *made_app, char ***app_env);
 137 static int init_globals(void);
 138 static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line);
 139 static int parse_locals(orte_job_t *jdata, int argc, char* argv[]);
 140 static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile);
 141 static int parse_appfile(orte_job_t *jdata, char *filename, char ***env);
 142 static void orte_timeout_wakeup(int sd, short args, void *cbdata);
 143 static void orte_profile_wakeup(int sd, short args, void *cbdata);
 144 static void profile_recv(int status, orte_process_name_t* sender,
 145                          opal_buffer_t *buffer, orte_rml_tag_t tag,
 146                          void* cbdata);
 147 static void launch_recv(int status, orte_process_name_t* sender,
 148                         opal_buffer_t *buffer,
 149                         orte_rml_tag_t tag, void *cbdata);
 150 static void complete_recv(int status, orte_process_name_t* sender,
 151                           opal_buffer_t *buffer,
 152                           orte_rml_tag_t tag, void *cbdata);
 153 static void attach_debugger(int fd, short event, void *arg);
 154 static void build_debugger_args(orte_app_context_t *debugger);
 155 static void open_fifo (void);
 156 static void run_debugger(char *basename, opal_cmd_line_t *cmd_line,
 157                          int argc, char *argv[], int num_procs);
 158 static void print_help(void);
 159 
 160 
 161 #define MPIR_MAX_PATH_LENGTH 512
 162 #define MPIR_MAX_ARG_LENGTH 1024
 163 struct MPIR_PROCDESC *MPIR_proctable = NULL;
 164 int MPIR_proctable_size = 0;
 165 volatile int MPIR_being_debugged = 0;
 166 volatile int MPIR_debug_state = 0;
 167 int MPIR_i_am_starter = 0;
 168 int MPIR_partial_attach_ok = 1;
 169 char MPIR_executable_path[MPIR_MAX_PATH_LENGTH] = {0};
 170 char MPIR_server_arguments[MPIR_MAX_ARG_LENGTH] = {0};
 171 volatile int MPIR_forward_output = 0;
 172 volatile int MPIR_forward_comm = 0;
 173 char MPIR_attach_fifo[MPIR_MAX_PATH_LENGTH] = {0};
 174 int MPIR_force_to_main = 0;
 175 static void orte_debugger_init_before_spawn(orte_job_t *jdata);
 176 
 177 ORTE_DECLSPEC void* __opal_attribute_optnone__ MPIR_Breakpoint(void);
 178 
 179 
 180 
 181 
 182 void* MPIR_Breakpoint(void)
 183 {
 184     return NULL;
 185 }
 186 
 187 
 188 typedef struct {
 189     opal_object_t super;
 190     orte_job_t *jdata;
 191     int index;
 192     orte_submit_cbfunc_t launch_cb;
 193     void *launch_cbdata;
 194     orte_submit_cbfunc_t complete_cb;
 195     void *complete_cbdata;
 196 } trackr_t;
 197 static void tcon(trackr_t *p)
 198 {
 199     p->jdata = NULL;
 200     p->launch_cb = NULL;
 201     p->launch_cbdata = NULL;
 202     p->complete_cb = NULL;
 203     p->complete_cbdata = NULL;
 204 }
 205 static void tdes(trackr_t *p)
 206 {
 207     if (NULL != p->jdata) {
 208         OBJ_RELEASE(p->jdata);
 209     }
 210 }
 211 static OBJ_CLASS_INSTANCE(trackr_t,
 212                           opal_object_t,
 213                           tcon, tdes);
 214 
 215 int orte_submit_init(int argc, char *argv[],
 216                      opal_cmd_line_init_t *opts)
 217 {
 218     int rc, i;
 219     char *param;
 220 
 221     
 222     memset(&orte_cmd_options, 0, sizeof(orte_cmd_options));
 223 
 224     
 225 
 226     orte_basename = opal_basename(argv[0]);
 227 
 228     
 229     for (i=0; NULL != argv[i]; i++) {
 230         if (':' == argv[i][0] ||
 231             NULL == argv[i+1] || NULL == argv[i+2]) {
 232             break;
 233         }
 234         if (0 == strncmp(argv[i], "-"OPAL_MCA_CMD_LINE_ID, strlen("-"OPAL_MCA_CMD_LINE_ID)) ||
 235             0 == strncmp(argv[i], "--"OPAL_MCA_CMD_LINE_ID, strlen("--"OPAL_MCA_CMD_LINE_ID)) ||
 236             0 == strncmp(argv[i], "-g"OPAL_MCA_CMD_LINE_ID, strlen("-g"OPAL_MCA_CMD_LINE_ID)) ||
 237             0 == strncmp(argv[i], "--g"OPAL_MCA_CMD_LINE_ID, strlen("--g"OPAL_MCA_CMD_LINE_ID))) {
 238             (void) mca_base_var_env_name (argv[i+1], ¶m);
 239             opal_setenv(param, argv[i+2], true, &environ);
 240             free(param);
 241         } else if (0 == strcmp(argv[i], "-am") ||
 242                    0 == strcmp(argv[i], "--am")) {
 243             (void)mca_base_var_env_name("mca_base_param_file_prefix", ¶m);
 244             opal_setenv(param, argv[i+1], true, &environ);
 245             free(param);
 246         } else if (0 == strcmp(argv[i], "-tune") ||
 247                    0 == strcmp(argv[i], "--tune")) {
 248             (void)mca_base_var_env_name("mca_base_envar_file_prefix", ¶m);
 249             opal_setenv(param, argv[i+1], true, &environ);
 250             free(param);
 251         }
 252     }
 253 
 254     
 255     if (OPAL_SUCCESS != (rc = opal_init_util(&argc, &argv))) {
 256         return rc;
 257     }
 258     
 259     if (ORTE_SUCCESS != (rc = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
 260         ORTE_ERROR_LOG(rc);
 261         return rc;
 262     }
 263     if (ORTE_SUCCESS != (rc = orte_schizo_base_select())) {
 264         ORTE_ERROR_LOG(rc);
 265         return rc;
 266     }
 267 
 268     OBJ_CONSTRUCT(&tool_jobs, opal_pointer_array_t);
 269     opal_pointer_array_init(&tool_jobs, 256, INT_MAX, 128);
 270 
 271 
 272     
 273     orte_cmd_line = OBJ_NEW(opal_cmd_line_t);
 274 
 275     
 276     if (NULL != opts) {
 277         if (OPAL_SUCCESS != (rc = opal_cmd_line_add(orte_cmd_line, opts))) {
 278             return rc;
 279         }
 280     }
 281 
 282     
 283     if (OPAL_SUCCESS != (rc = orte_schizo.define_cli(orte_cmd_line))) {
 284         return rc;
 285     }
 286 
 287     
 288     mca_base_cmd_line_setup(orte_cmd_line);
 289 
 290     
 291     if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line,
 292                                                   true, false, argc, argv)) ) {
 293         if (OPAL_ERR_SILENT != rc) {
 294             fprintf(stderr, "%s: command line error (%s)\n", argv[0],
 295                     opal_strerror(rc));
 296         }
 297         return rc;
 298     }
 299 
 300     
 301 
 302 
 303      if (orte_cmd_options.version) {
 304         char *str, *project_name = NULL;
 305         if (0 == strcmp(orte_basename, "mpirun")) {
 306             project_name = "Open MPI";
 307         } else {
 308             project_name = "OpenRTE";
 309         }
 310         str = opal_info_make_version_str("all",
 311                                          OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
 312                                          OPAL_RELEASE_VERSION,
 313                                          OPAL_GREEK_VERSION,
 314                                          OPAL_REPO_REV);
 315         if (NULL != str) {
 316             fprintf(stdout, "%s (%s) %s\n\nReport bugs to %s\n",
 317                     orte_basename, project_name, str, PACKAGE_BUGREPORT);
 318             free(str);
 319         }
 320         exit(0);
 321     }
 322 
 323     
 324 
 325 
 326 
 327     if (0 == geteuid() && !orte_cmd_options.run_as_root) {
 328         
 329         char *r1, *r2;
 330         if (NULL != (r1 = getenv("OMPI_ALLOW_RUN_AS_ROOT")) &&
 331             NULL != (r2 = getenv("OMPI_ALLOW_RUN_AS_ROOT_CONFIRM"))) {
 332             if (0 == strcmp(r1, "1") && 0 == strcmp(r2, "1")) {
 333                 goto moveon;
 334             }
 335         }
 336         
 337         fprintf(stderr, "--------------------------------------------------------------------------\n");
 338         if (orte_cmd_options.help) {
 339             fprintf(stderr, "%s cannot provide the help message when run as root.\n\n", orte_basename);
 340         } else {
 341             fprintf(stderr, "%s has detected an attempt to run as root.\n\n", orte_basename);
 342         }
 343 
 344         fprintf(stderr, "Running as root is *strongly* discouraged as any mistake (e.g., in\n");
 345         fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n");
 346         fprintf(stderr, "file system, leaving your system in an unusable state.\n\n");
 347 
 348         fprintf(stderr, "We strongly suggest that you run %s as a non-root user.\n\n", orte_basename);
 349 
 350         fprintf(stderr, "You can override this protection by adding the --allow-run-as-root option\n");
 351         fprintf(stderr, "to the cmd line or by setting two environment variables in the following way:\n");
 352         fprintf(stderr, "the variable OMPI_ALLOW_RUN_AS_ROOT=1 to indicate the desire to override this\n");
 353         fprintf(stderr, "protection, and OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 to confirm the choice and\n");
 354         fprintf(stderr, "add one more layer of certainty that you want to do so.\n");
 355         fprintf(stderr, "We reiterate our advice against doing so - please proceed at your own risk.\n");
 356         fprintf(stderr, "--------------------------------------------------------------------------\n");
 357         exit(1);
 358     }
 359 
 360   moveon:
 361     
 362     rc = mca_base_cmd_line_process_args(orte_cmd_line, &environ, &environ);
 363     if (ORTE_SUCCESS != rc) {
 364         return rc;
 365     }
 366 
 367     
 368     if (OPAL_SUCCESS != (rc = opal_init(&argc, &argv))) {
 369         return rc;
 370     }
 371 
 372     
 373     if (NULL != orte_cmd_options.help) {
 374         print_help();
 375 
 376         
 377         exit(0);
 378     }
 379 
 380     
 381     if (ORTE_PROC_TYPE_NONE == orte_process_info.proc_type) {
 382    
 383 
 384         if (NULL == orte_cmd_options.hnp) {
 385             orte_process_info.proc_type = ORTE_PROC_HNP;
 386         } else {
 387             orte_process_info.proc_type = ORTE_PROC_TOOL;
 388         }
 389     }
 390     if (ORTE_PROC_IS_TOOL) {
 391         if (0 == strncasecmp(orte_cmd_options.hnp, "file", strlen("file"))) {
 392             char input[1024], *filename;
 393             FILE *fp;
 394 
 395             
 396             filename = strchr(orte_cmd_options.hnp, ':');
 397             if (NULL == filename) {
 398                 
 399                 orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", orte_cmd_options.hnp);
 400                 exit(1);
 401             }
 402             ++filename; 
 403 
 404             if (0 >= strlen(filename)) {
 405                 
 406                 orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", orte_cmd_options.hnp);
 407                 exit(1);
 408             }
 409 
 410             
 411             fp = fopen(filename, "r");
 412             if (NULL == fp) { 
 413                 orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, orte_cmd_options.hnp);
 414                 exit(1);
 415             }
 416             
 417 
 418             memset(input, 0, 1024);
 419             if (NULL == fgets(input, 1024, fp)) {
 420                 
 421                 fclose(fp);
 422                 orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, orte_cmd_options.hnp);
 423                 exit(1);
 424             }
 425             fclose(fp);
 426             input[strlen(input)-1] = '\0';  
 427             
 428             opal_setenv(OPAL_MCA_PREFIX"orte_hnp_uri", input, true, &environ);
 429         } else {
 430             
 431             opal_setenv(OPAL_MCA_PREFIX"orte_hnp_uri", orte_cmd_options.hnp, true, &environ);
 432         }
 433         
 434 
 435         opal_setenv(OPAL_MCA_PREFIX"ess", "tool", true, &environ);
 436     } else {
 437         
 438 
 439 
 440 
 441 
 442 
 443 
 444 
 445 
 446 
 447         orte_cmd_options.prefix = NULL;
 448         orte_cmd_options.path_to_mpirun = NULL;
 449         if (opal_cmd_line_is_taken(orte_cmd_line, "prefix") ||
 450             '/' == argv[0][0] || want_prefix_by_default) {
 451             size_t param_len;
 452             if ('/' == argv[0][0]) {
 453                 char* tmp_basename = NULL;
 454                 
 455 
 456                 orte_cmd_options.path_to_mpirun = opal_dirname(argv[0]);
 457                 
 458 
 459 
 460 
 461                 tmp_basename = opal_basename(orte_cmd_options.path_to_mpirun);
 462                 if (0 == strcmp("bin", tmp_basename)) {
 463                     char* tmp = orte_cmd_options.path_to_mpirun;
 464                     orte_cmd_options.path_to_mpirun = opal_dirname(tmp);
 465                     free(tmp);
 466                 } else {
 467                     free(orte_cmd_options.path_to_mpirun);
 468                     orte_cmd_options.path_to_mpirun = NULL;
 469                 }
 470                 free(tmp_basename);
 471             }
 472             
 473             if (opal_cmd_line_is_taken(orte_cmd_line, "prefix") &&
 474                 NULL != orte_cmd_options.path_to_mpirun) {
 475                 char *tmp_basename;
 476                 
 477                 param = strdup(opal_cmd_line_get_param(orte_cmd_line, "prefix", 0, 0));
 478                 
 479                 if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) {
 480                     param[strlen(param)-1] = '\0';
 481                 }
 482                 tmp_basename = strdup(orte_cmd_options.path_to_mpirun);
 483                 if (0 == strcmp(OPAL_PATH_SEP, &(tmp_basename[strlen(tmp_basename)-1]))) {
 484                     tmp_basename[strlen(tmp_basename)-1] = '\0';
 485                 }
 486                 if (0 != strcmp(param, tmp_basename)) {
 487                     orte_show_help("help-orterun.txt", "orterun:double-prefix",
 488                                    true, orte_basename, orte_basename,
 489                                    param, tmp_basename, orte_basename);
 490                     
 491 
 492 
 493 
 494                     free(orte_cmd_options.path_to_mpirun);
 495                     orte_cmd_options.path_to_mpirun = NULL;
 496                 }
 497                 free(tmp_basename);
 498             } else if (NULL != orte_cmd_options.path_to_mpirun) {
 499                 param = strdup(orte_cmd_options.path_to_mpirun);
 500             } else if (opal_cmd_line_is_taken(orte_cmd_line, "prefix")){
 501                 
 502                 param = strdup(opal_cmd_line_get_param(orte_cmd_line, "prefix", 0, 0));
 503             } else {
 504                 
 505                 param = strdup(opal_install_dirs.prefix);
 506             }
 507 
 508             if (NULL != param) {
 509                 
 510                 param_len = strlen(param);
 511                 while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) {
 512                     param[param_len-1] = '\0';
 513                     param_len--;
 514                     if (0 == param_len) {
 515                         orte_show_help("help-orterun.txt", "orterun:empty-prefix",
 516                                        true, orte_basename, orte_basename);
 517                         free(param);
 518                         return ORTE_ERR_FATAL;
 519                     }
 520                 }
 521 
 522                 orte_cmd_options.prefix = param;
 523             }
 524             want_prefix_by_default = true;
 525         }
 526     }
 527 
 528     
 529     orte_register_params();
 530 
 531     if (orte_cmd_options.debug) {
 532         orte_devel_level_output = true;
 533     }
 534 
 535     
 536 
 537 
 538 
 539 
 540 
 541     if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv,
 542                                         orte_process_info.proc_type))) {
 543         
 544 
 545 
 546         return rc;
 547     }
 548     
 549 
 550 
 551     opal_finalize();
 552 
 553     if (ORTE_PROC_IS_TOOL) {
 554         opal_value_t val;
 555         
 556         if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
 557             orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
 558             exit(1);
 559         }
 560         
 561         OBJ_CONSTRUCT(&val, opal_value_t);
 562         val.key = OPAL_PMIX_PROC_URI;
 563         val.type = OPAL_STRING;
 564         val.data.string = orte_process_info.my_hnp_uri;
 565         if (OPAL_SUCCESS != opal_pmix.store_local(ORTE_PROC_MY_HNP, &val)) {
 566             val.key = NULL;
 567             val.data.string = NULL;
 568             OBJ_DESTRUCT(&val);
 569             orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
 570             orte_finalize();
 571             exit(1);
 572         }
 573         val.key = NULL;
 574         val.data.string = NULL;
 575         OBJ_DESTRUCT(&val);
 576 
 577         
 578         if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
 579             orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
 580             orte_finalize();
 581             exit(1);
 582         }
 583 
 584         
 585         orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
 586 
 587         
 588         orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE,
 589                                 ORTE_RML_PERSISTENT, complete_recv, NULL);
 590         orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP,
 591                                 ORTE_RML_PERSISTENT, launch_recv, NULL);
 592     } else {
 593         
 594 
 595 
 596 
 597 
 598 
 599         orte_launch_environ = opal_argv_copy(environ);
 600         
 601 
 602         opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
 603         opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
 604     }
 605 
 606     return ORTE_SUCCESS;
 607 }
 608 
 609 static void print_help()
 610 {
 611     char *str = NULL, *args;
 612     char *project_name = NULL;
 613 
 614     if (0 == strcmp(orte_basename, "mpirun")) {
 615         project_name = "Open MPI";
 616     } else {
 617         project_name = "OpenRTE";
 618     }
 619     args = opal_cmd_line_get_usage_msg(orte_cmd_line);
 620     str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
 621                                  orte_basename, project_name, OPAL_VERSION,
 622                                  orte_basename, args,
 623                                  PACKAGE_BUGREPORT);
 624     if (NULL != str) {
 625         printf("%s", str);
 626         free(str);
 627     }
 628     free(args);
 629 }
 630 
 631 void orte_submit_finalize(void)
 632 {
 633     trackr_t *trk;
 634     int i, rc;
 635 
 636     for (i=0; i < tool_jobs.size; i++) {
 637         if (NULL != (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, i))) {
 638             OBJ_RELEASE(trk);
 639         }
 640     }
 641     OBJ_DESTRUCT(&tool_jobs);
 642 
 643     
 644     if (ORTE_SUCCESS != (rc = mca_base_framework_close(&orte_schizo_base_framework))) {
 645         ORTE_ERROR_LOG(rc);
 646         return;
 647     }
 648 
 649     
 650     if (OPAL_SUCCESS != (rc = opal_finalize_util())) {
 651         return;
 652     }
 653 
 654     
 655     if (NULL != orte_cmd_line) {
 656         OBJ_RELEASE(orte_cmd_line);
 657     }
 658 
 659     
 660     if (0 <= orte_debugger_attach_fd) {
 661         if (orte_debugger_fifo_active) {
 662             opal_event_del(orte_debugger_attach);
 663             free(orte_debugger_attach);
 664         }
 665         close(orte_debugger_attach_fd);
 666         unlink(MPIR_attach_fifo);
 667     }
 668 
 669     if (NULL != orte_cmd_options.prefix) {
 670         free(orte_cmd_options.prefix);
 671     }
 672     if (NULL != orte_launch_environ) {
 673         opal_argv_free(orte_launch_environ);
 674     }
 675     if (NULL != orte_basename) {
 676         free(orte_basename);
 677     }
 678 }
 679 
 680 int orte_submit_cancel(int index) {
 681 
 682     int rc;
 683     trackr_t *trk;
 684     opal_buffer_t *req;
 685     orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_TERMINATE_JOB_CMD;
 686 
 687     
 688     if (NULL == (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, index))) {
 689         opal_output(0, "TRACKER ID %d RETURNED INDEX TO NULL OBJECT", index);
 690         return ORTE_ERROR;
 691     }
 692 
 693     
 694     req = OBJ_NEW(opal_buffer_t);
 695     if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) {
 696         ORTE_ERROR_LOG(rc);
 697         return rc;
 698     }
 699     if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &trk->jdata->jobid, 1, ORTE_JOBID))) {
 700         ORTE_ERROR_LOG(rc);
 701         return rc;
 702     }
 703     rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
 704                                  orte_rml_send_callback, NULL);
 705     if (ORTE_SUCCESS != rc) {
 706         ORTE_ERROR_LOG(rc);
 707         OBJ_RELEASE(req);
 708         return rc;
 709     }
 710 
 711     return ORTE_ERR_OP_IN_PROGRESS;
 712 }
 713 
 714 
 715 int orte_submit_halt(void)
 716 {
 717     int rc;
 718     opal_buffer_t *req;
 719     orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_HALT_DVM_CMD;
 720 
 721     req = OBJ_NEW(opal_buffer_t);
 722     if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) {
 723         ORTE_ERROR_LOG(rc);
 724         return rc;
 725     }
 726     rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req,
 727                                  ORTE_RML_TAG_DAEMON,
 728                                  orte_rml_send_callback, NULL);
 729     if (ORTE_SUCCESS != rc) {
 730         ORTE_ERROR_LOG(rc);
 731         OBJ_RELEASE(req);
 732         return rc;
 733     }
 734 
 735     return ORTE_ERR_OP_IN_PROGRESS;
 736 }
 737 
 738 
 739 
 740 
 741 int orte_submit_job(char *argv[], int *index,
 742                     orte_submit_cbfunc_t launch_cb,
 743                     void *launch_cbdata,
 744                     orte_submit_cbfunc_t complete_cb,
 745                     void *complete_cbdata)
 746 {
 747     opal_buffer_t *req;
 748     int rc, n;
 749     orte_app_idx_t i;
 750     orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_SPAWN_JOB_CMD;
 751     char *param;
 752     orte_job_t *jdata = NULL, *daemons;
 753     orte_app_context_t *app, *dapp;
 754     trackr_t *trk;
 755     int argc;
 756 
 757     
 758     if (NULL != getenv("OMPI_UNIVERSE_SIZE")) {
 759         fprintf(stderr, "\n\n**********************************************************\n\n");
 760         fprintf(stderr, "%s does not support recursive calls\n", orte_basename);
 761         fprintf(stderr, "\n**********************************************************\n");
 762         return ORTE_ERR_FATAL;
 763     }
 764 
 765     
 766 
 767     init_globals();
 768 
 769     argc = opal_argv_count(argv);
 770 
 771     
 772 
 773     if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, true, false,
 774                                                   argc, argv)) ) {
 775         if (OPAL_ERR_SILENT != rc) {
 776             fprintf(stderr, "%s: command line error (%s)\n", argv[0],
 777                     opal_strerror(rc));
 778         }
 779         return rc;
 780     }
 781 
 782     
 783     parse_globals(argc, argv, orte_cmd_line);
 784 
 785     
 786 
 787 
 788 
 789     jdata = OBJ_NEW(orte_job_t);
 790     if (NULL == jdata) {
 791         
 792 
 793 
 794         return ORTE_ERR_OUT_OF_RESOURCE;
 795     }
 796     
 797     if (NULL != orte_cmd_options.personality) {
 798         jdata->personality = opal_argv_split(orte_cmd_options.personality, ',');
 799     } else {
 800         
 801         opal_argv_append_nosize(&jdata->personality, "ompi");
 802     }
 803 
 804     trk = OBJ_NEW(trackr_t);
 805     trk->jdata = jdata;
 806     trk->launch_cb = launch_cb;
 807     trk->launch_cbdata = launch_cbdata;
 808     trk->complete_cb = complete_cb;
 809     trk->complete_cbdata = complete_cbdata;
 810     trk->index = opal_pointer_array_add(&tool_jobs, trk);
 811 
 812 
 813     
 814     orte_set_attribute(&jdata->attributes, ORTE_JOB_ROOM_NUM, ORTE_ATTR_GLOBAL, &trk->index, OPAL_INT);
 815 
 816     
 817     
 818     if (orte_cmd_options.tag_output) {
 819         orte_set_attribute(&jdata->attributes, ORTE_JOB_TAG_OUTPUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 820     }
 821     
 822     if (orte_cmd_options.timestamp_output) {
 823         orte_set_attribute(&jdata->attributes, ORTE_JOB_TIMESTAMP_OUTPUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 824     }
 825     
 826     if (NULL != orte_cmd_options.output_filename) {
 827         
 828 
 829 
 830 
 831         if (!opal_path_is_absolute(orte_cmd_options.output_filename)) {
 832             char cwd[OPAL_PATH_MAX], *path;
 833             getcwd(cwd, sizeof(cwd));
 834             path = opal_os_path(false, cwd, orte_cmd_options.output_filename, NULL);
 835             orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_FILE, ORTE_ATTR_GLOBAL, path, OPAL_STRING);
 836             free(path);
 837         } else {
 838             orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_FILE, ORTE_ATTR_GLOBAL, orte_cmd_options.output_filename, OPAL_STRING);
 839         }
 840     }
 841     
 842     if (orte_cmd_options.merge) {
 843         orte_set_attribute(&jdata->attributes, ORTE_JOB_MERGE_STDERR_STDOUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 844     }
 845 
 846     
 847     if (NULL != orte_cmd_options.stdin_target) {
 848         if (0 == strcmp(orte_cmd_options.stdin_target, "all")) {
 849             jdata->stdin_target = ORTE_VPID_WILDCARD;
 850         } else if (0 == strcmp(orte_cmd_options.stdin_target, "none")) {
 851             jdata->stdin_target = ORTE_VPID_INVALID;
 852         } else {
 853             jdata->stdin_target = strtoul(orte_cmd_options.stdin_target, NULL, 10);
 854         }
 855     }
 856 
 857     
 858     if (orte_cmd_options.index_argv) {
 859         orte_set_attribute(&jdata->attributes, ORTE_JOB_INDEX_ARGV, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 860     }
 861 
 862     
 863     parse_locals(jdata, argc, argv);
 864 
 865     if (0 == jdata->num_apps) {
 866         
 867 
 868         orte_show_help("help-orterun.txt", "orterun:nothing-to-do",
 869                        true, orte_basename);
 870         ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
 871         return ORTE_ERR_FATAL;
 872     }
 873 
 874     
 875     jdata->map = OBJ_NEW(orte_job_map_t);
 876 
 877     if (NULL != orte_cmd_options.mapping_policy) {
 878         if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) {
 879             ORTE_ERROR_LOG(rc);
 880             return rc;
 881         }
 882     } else if (orte_cmd_options.pernode) {
 883         ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR);
 884         ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN);
 885         
 886         jdata->map->ppr = strdup("1:node");
 887     } else if (0 < orte_cmd_options.npernode) {
 888         ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR);
 889         ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN);
 890         
 891         opal_asprintf(&jdata->map->ppr, "%d:node", orte_cmd_options.npernode);
 892     } else if (0 < orte_cmd_options.npersocket) {
 893         ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_PPR);
 894         ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN);
 895         
 896         opal_asprintf(&jdata->map->ppr, "%d:socket", orte_cmd_options.npersocket);
 897     }
 898 
 899 
 900     
 901     if (0 < orte_cmd_options.cpus_per_proc) {
 902         jdata->map->cpus_per_rank = orte_cmd_options.cpus_per_proc;
 903     }
 904     
 905     if (NULL != orte_cmd_options.ranking_policy) {
 906         if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking,
 907                                                                      jdata->map->mapping,
 908                                                                      orte_cmd_options.ranking_policy))) {
 909             ORTE_ERROR_LOG(rc);
 910             return rc;
 911         }
 912     }
 913     
 914     if (NULL != orte_cmd_options.binding_policy) {
 915         if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding,
 916                                                                      orte_cmd_options.binding_policy))) {
 917             ORTE_ERROR_LOG(rc);
 918             return rc;
 919         }
 920     }
 921 
 922     
 923     if (orte_cmd_options.nolocal) {
 924         ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
 925     }
 926     if (orte_cmd_options.no_oversubscribe) {
 927         ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
 928     }
 929     if (orte_cmd_options.oversubscribe) {
 930         ORTE_UNSET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
 931         ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
 932     }
 933     if (orte_cmd_options.report_bindings) {
 934         orte_set_attribute(&jdata->attributes, ORTE_JOB_REPORT_BINDINGS, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 935     }
 936     if (orte_cmd_options.cpu_list) {
 937         orte_set_attribute(&jdata->attributes, ORTE_JOB_CPU_LIST, ORTE_ATTR_GLOBAL, orte_cmd_options.cpu_list, OPAL_STRING);
 938     }
 939 
 940     
 941     if (orte_enable_recovery) {
 942         ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_RECOVERABLE);
 943         if (0 == orte_max_restarts) {
 944             
 945             orte_set_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 946         }
 947     }
 948     
 949     if (0 < orte_max_restarts) {
 950         for (i=0; i < jdata->num_apps; i++) {
 951             if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 952                 orte_set_attribute(&app->attributes, ORTE_APP_MAX_RESTARTS, ORTE_ATTR_GLOBAL, &orte_max_restarts, OPAL_INT32);
 953             }
 954         }
 955     }
 956     
 957     if (orte_cmd_options.continuous) {
 958         
 959         orte_set_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
 960     }
 961 
 962     
 963     if (NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
 964         char *evar;
 965         evar = getenv("ORTE_TEST_DEBUGGER_SLEEP");
 966         for (n=0; n < (int)jdata->num_apps; n++) {
 967             if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 968                 opal_setenv("ORTE_TEST_DEBUGGER_ATTACH", "1", true, &app->env);
 969                 if (NULL != evar) {
 970                     opal_setenv("ORTE_TEST_DEBUGGER_SLEEP", evar, true, &app->env);
 971                 }
 972             }
 973         }
 974     }
 975 
 976     
 977     if (NULL != getenv("ORTE_TEST_HNP_SUICIDE") ||
 978         NULL != getenv("ORTE_TEST_ORTED_SUICIDE")) {
 979         
 980 
 981         ORTE_FLAG_UNSET(jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT);
 982     }
 983 
 984     
 985 
 986 
 987     param = NULL;
 988     if (0 < orte_cmd_options.timeout ||
 989         NULL != (param = getenv("MPIEXEC_TIMEOUT"))) {
 990         if (NULL != param) {
 991             timeout_seconds = strtol(param, NULL, 10);
 992             
 993             if (0 < orte_cmd_options.timeout && timeout_seconds != orte_cmd_options.timeout) {
 994                 orte_show_help("help-orterun.txt", "orterun:timeoutconflict", false,
 995                                orte_basename, orte_cmd_options.timeout, param);
 996                 exit(1);
 997             }
 998         } else {
 999             timeout_seconds = orte_cmd_options.timeout;
1000         }
1001         if (NULL == (orte_mpiexec_timeout = OBJ_NEW(orte_timer_t))) {
1002             ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
1003             ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
1004             
1005         }
1006         orte_mpiexec_timeout->tv.tv_sec = timeout_seconds;
1007         orte_mpiexec_timeout->tv.tv_usec = 0;
1008         opal_event_evtimer_set(orte_event_base, orte_mpiexec_timeout->ev,
1009                                orte_timeout_wakeup, jdata);
1010         opal_event_set_priority(orte_mpiexec_timeout->ev, ORTE_ERROR_PRI);
1011         opal_event_evtimer_add(orte_mpiexec_timeout->ev, &orte_mpiexec_timeout->tv);
1012     }
1013 
1014     
1015     if (NULL != (param = getenv("OMPI_MEMPROFILE"))) {
1016         timeout_seconds = strtol(param, NULL, 10);
1017         if (NULL == (orte_memprofile_timeout = OBJ_NEW(orte_timer_t))) {
1018             ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
1019             ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_OUT_OF_RESOURCE);
1020             
1021         }
1022         orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MEMPROFILE,
1023                                 ORTE_RML_PERSISTENT, profile_recv, NULL);
1024         orte_memprofile_timeout->tv.tv_sec = timeout_seconds;
1025         orte_memprofile_timeout->tv.tv_usec = 0;
1026         opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
1027                                orte_profile_wakeup, jdata);
1028         opal_event_set_priority(orte_memprofile_timeout->ev, ORTE_ERROR_PRI);
1029         opal_event_evtimer_add(orte_memprofile_timeout->ev, &orte_memprofile_timeout->tv);
1030     }
1031     if (ORTE_PROC_IS_HNP) {
1032         
1033         daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
1034 
1035         
1036         if (NULL != orte_cmd_options.report_uri) {
1037             FILE *fp;
1038             char *rml_uri;
1039             orte_oob_base_get_addr(&rml_uri);
1040             if (0 == strcmp(orte_cmd_options.report_uri, "-")) {
1041                 
1042                 printf("%s\n",  (NULL == rml_uri) ? "NULL" : rml_uri);
1043             } else if (0 == strcmp(orte_cmd_options.report_uri, "+")) {
1044                 
1045                 fprintf(stderr, "%s\n",  (NULL == rml_uri) ? "NULL" : rml_uri);
1046             } else {
1047                 fp = fopen(orte_cmd_options.report_uri, "w");
1048                 if (NULL == fp) {
1049                     orte_show_help("help-orterun.txt", "orterun:write_file", false,
1050                                    orte_basename, "uri", orte_cmd_options.report_uri);
1051                     exit(1);
1052                 }
1053                 fprintf(fp, "%s\n", (NULL == rml_uri) ? "NULL" : rml_uri);
1054                 fclose(fp);
1055             }
1056             if (NULL != rml_uri) {
1057                 free(rml_uri);
1058             }
1059         }
1060         
1061 
1062 
1063 
1064 
1065 
1066 
1067 
1068 
1069         param = NULL;
1070         if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0)) &&
1071             orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)¶m, OPAL_STRING)) {
1072             char *oldenv, *newenv, *lib_base, *bin_base;
1073 
1074             
1075 
1076 
1077             if (NULL == (dapp = (orte_app_context_t*)opal_pointer_array_get_item(daemons->apps, 0))) {
1078                 
1079                 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
1080                 return ORTE_ERR_NOT_FOUND;
1081             }
1082             orte_set_attribute(&dapp->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_LOCAL, param, OPAL_STRING);
1083 
1084             lib_base = opal_basename(opal_install_dirs.libdir);
1085             bin_base = opal_basename(opal_install_dirs.bindir);
1086 
1087             
1088             newenv = opal_os_path( false, param, bin_base, NULL );
1089             oldenv = getenv("PATH");
1090             if (NULL != oldenv) {
1091                 char *temp;
1092                 opal_asprintf(&temp, "%s:%s", newenv, oldenv );
1093                 free( newenv );
1094                 newenv = temp;
1095             }
1096             opal_setenv("PATH", newenv, true, &orte_launch_environ);
1097             if (orte_debug_flag) {
1098                 opal_output(0, "%s: reset PATH: %s", orte_basename, newenv);
1099             }
1100             free(newenv);
1101             free(bin_base);
1102 
1103             
1104             newenv = opal_os_path( false, param, lib_base, NULL );
1105             oldenv = getenv("LD_LIBRARY_PATH");
1106             if (NULL != oldenv) {
1107                 char* temp;
1108                 opal_asprintf(&temp, "%s:%s", newenv, oldenv);
1109                 free(newenv);
1110                 newenv = temp;
1111             }
1112             opal_setenv("LD_LIBRARY_PATH", newenv, true, &orte_launch_environ);
1113             if (orte_debug_flag) {
1114                 opal_output(0, "%s: reset LD_LIBRARY_PATH: %s",
1115                             orte_basename, newenv);
1116             }
1117             free(newenv);
1118             free(lib_base);
1119             free(param);
1120         }
1121 
1122         
1123         orte_debugger_init_before_spawn(jdata);
1124 
1125         rc = orte_plm.spawn(jdata);
1126     } else {
1127         
1128         orte_set_attribute(&jdata->attributes, ORTE_JOB_DVM_JOB, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1129         
1130 
1131 
1132         orte_set_attribute(&jdata->attributes, ORTE_JOB_FIXED_DVM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1133         
1134         req = OBJ_NEW(opal_buffer_t);
1135         if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &cmd, 1, ORTE_DAEMON_CMD))) {
1136             ORTE_ERROR_LOG(rc);
1137             return rc;
1138         }
1139         if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &jdata, 1, ORTE_JOB))) {
1140             ORTE_ERROR_LOG(rc);
1141             return rc;
1142         }
1143         if (OPAL_SUCCESS != (rc = opal_dss.pack(req, &trk->index, 1, OPAL_INT))) {
1144             ORTE_ERROR_LOG(rc);
1145             return rc;
1146         }
1147         orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
1148                                 orte_rml_send_callback, NULL);
1149 
1150         
1151         if (NULL != index) {
1152             *index = trk->index;
1153         }
1154     }
1155 
1156     return ORTE_SUCCESS;
1157 
1158 }
1159 
1160 
1161 static int init_globals(void)
1162 {
1163     
1164     orte_cmd_options.help = NULL;
1165     orte_cmd_options.version = false;
1166     orte_cmd_options.num_procs =  0;
1167     if (NULL != orte_cmd_options.appfile) {
1168         free(orte_cmd_options.appfile);
1169         orte_cmd_options.appfile = NULL;
1170     }
1171     if (NULL != orte_cmd_options.wdir) {
1172         free(orte_cmd_options.wdir);
1173         orte_cmd_options.wdir = NULL;
1174     }
1175     orte_cmd_options.set_cwd_to_session_dir = false;
1176     if (NULL != orte_cmd_options.path) {
1177         free(orte_cmd_options.path);
1178         orte_cmd_options.path = NULL;
1179     }
1180     if (NULL != orte_cmd_options.hnp) {
1181         free(orte_cmd_options.hnp);
1182         orte_cmd_options.hnp = NULL;
1183     }
1184     if (NULL != orte_cmd_options.stdin_target) {
1185         free(orte_cmd_options.stdin_target);
1186         orte_cmd_options.stdin_target = NULL ;
1187     }
1188     if (NULL != orte_cmd_options.output_filename) {
1189         free(orte_cmd_options.output_filename);
1190         orte_cmd_options.output_filename = NULL ;
1191     }
1192     if (NULL != orte_cmd_options.binding_policy) {
1193         free(orte_cmd_options.binding_policy);
1194         orte_cmd_options.binding_policy = NULL;
1195     }
1196     if (NULL != orte_cmd_options.mapping_policy) {
1197         free(orte_cmd_options.mapping_policy);
1198         orte_cmd_options.mapping_policy = NULL;
1199     }
1200     if (NULL != orte_cmd_options.ranking_policy) {
1201         free(orte_cmd_options.ranking_policy);
1202         orte_cmd_options.ranking_policy = NULL;
1203     }
1204 
1205     if (NULL != orte_cmd_options.report_pid) {
1206         free(orte_cmd_options.report_pid);
1207         orte_cmd_options.report_pid = NULL;
1208     }
1209     if (NULL != orte_cmd_options.report_uri) {
1210         free(orte_cmd_options.report_uri);
1211         orte_cmd_options.report_uri = NULL;
1212     }
1213     if (NULL != orte_cmd_options.cpu_list) {
1214         free(orte_cmd_options.cpu_list);
1215         orte_cmd_options.cpu_list= NULL;
1216     }
1217     orte_cmd_options.preload_binaries = false;
1218     if (NULL != orte_cmd_options.preload_files) {
1219         free(orte_cmd_options.preload_files);
1220         orte_cmd_options.preload_files  = NULL;
1221     }
1222 
1223 
1224     
1225     return ORTE_SUCCESS;
1226 }
1227 
1228 
1229 static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line)
1230 {
1231     
1232     if (NULL != orte_cmd_options.report_pid) {
1233         FILE *fp;
1234         if (0 == strcmp(orte_cmd_options.report_pid, "-")) {
1235             
1236             printf("%d\n", (int)getpid());
1237         } else if (0 == strcmp(orte_cmd_options.report_pid, "+")) {
1238             
1239             fprintf(stderr, "%d\n", (int)getpid());
1240         } else {
1241             fp = fopen(orte_cmd_options.report_pid, "w");
1242             if (NULL == fp) {
1243                 orte_show_help("help-orterun.txt", "orterun:write_file", false,
1244                                orte_basename, "pid", orte_cmd_options.report_pid);
1245                 exit(0);
1246             }
1247             fprintf(fp, "%d\n", (int)getpid());
1248             fclose(fp);
1249         }
1250     }
1251 
1252     
1253 
1254     if (orte_cmd_options.debugger) {
1255         run_debugger(orte_basename, cmd_line, argc, argv, orte_cmd_options.num_procs);
1256     }
1257 
1258     return ORTE_SUCCESS;
1259 }
1260 
1261 
1262 static int parse_locals(orte_job_t *jdata, int argc, char* argv[])
1263 {
1264     int i, rc, app_num;
1265     int temp_argc;
1266     char **temp_argv, **env;
1267     orte_app_context_t *app;
1268     bool made_app;
1269     orte_std_cntr_t j, size1;
1270 
1271     
1272     temp_argc = 0;
1273     temp_argv = NULL;
1274     opal_argv_append(&temp_argc, &temp_argv, argv[0]);
1275 
1276     
1277 
1278 
1279 
1280     env = NULL;
1281     for (app_num = 0, i = 1; i < argc; ++i) {
1282         if (0 == strcmp(argv[i], ":")) {
1283             
1284             if (opal_argv_count(temp_argv) > 1) {
1285                 if (NULL != env) {
1286                     opal_argv_free(env);
1287                     env = NULL;
1288                 }
1289                 app = NULL;
1290                 rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env);
1291                 
1292                 if (ORTE_SUCCESS != rc) {
1293                     
1294 
1295 
1296                     exit(1);
1297                 }
1298                 if (made_app) {
1299                     app->idx = app_num;
1300                     ++app_num;
1301                     opal_pointer_array_add(jdata->apps, app);
1302                     ++jdata->num_apps;
1303                 }
1304 
1305                 
1306 
1307                 temp_argc = 0;
1308                 temp_argv = NULL;
1309                 opal_argv_append(&temp_argc, &temp_argv, argv[0]);
1310             }
1311         } else {
1312             opal_argv_append(&temp_argc, &temp_argv, argv[i]);
1313         }
1314     }
1315 
1316     if (opal_argv_count(temp_argv) > 1) {
1317         app = NULL;
1318         rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env);
1319         if (ORTE_SUCCESS != rc) {
1320             
1321 
1322             exit(1);
1323         }
1324         if (made_app) {
1325             app->idx = app_num;
1326             ++app_num;
1327             opal_pointer_array_add(jdata->apps, app);
1328             ++jdata->num_apps;
1329         }
1330     }
1331     if (NULL != env) {
1332         opal_argv_free(env);
1333     }
1334     opal_argv_free(temp_argv);
1335 
1336    
1337 
1338 
1339 
1340     if (NULL != global_mca_env) {
1341         size1 = (size_t)opal_pointer_array_get_size(jdata->apps);
1342         
1343         for (j = 0; j < size1; ++j) {
1344             app = (orte_app_context_t *)
1345                 opal_pointer_array_get_item(jdata->apps, j);
1346             if (NULL != app) {
1347                 
1348                 env = opal_environ_merge(global_mca_env, app->env);
1349                 opal_argv_free(app->env);
1350                 app->env = env;
1351             }
1352         }
1353     }
1354 
1355     
1356 
1357 
1358 
1359 
1360 
1361 
1362 
1363 
1364 
1365 
1366     env = NULL;
1367     if (NULL != global_mca_env) {
1368         env = global_mca_env;
1369     } else {
1370         if (opal_pointer_array_get_size(jdata->apps) >= 1) {
1371             
1372 
1373 
1374             app = (orte_app_context_t *)
1375                 opal_pointer_array_get_item(jdata->apps, 0);
1376             if (NULL != app) {
1377                 env = app->env;
1378                 for (j = 1; j < opal_pointer_array_get_size(jdata->apps); ++j) {
1379                     if (NULL != opal_pointer_array_get_item(jdata->apps, j)) {
1380                         env = NULL;
1381                         break;
1382                     }
1383                 }
1384             }
1385         }
1386     }
1387 
1388     if (NULL != env) {
1389         size1 = opal_argv_count(env);
1390         for (j = 0; j < size1; ++j) {
1391             
1392 
1393 
1394 
1395 
1396 
1397 
1398             char *value, *s = strdup(env[j]);
1399 
1400             if (NULL == s) {
1401                 return OPAL_ERR_OUT_OF_RESOURCE;
1402             }
1403 
1404             value = strchr(s, '=');
1405             if (NULL != value) {
1406                 value++;
1407             }
1408             opal_setenv(s, value, true, &environ);
1409             free(s);
1410         }
1411     }
1412 
1413     
1414 
1415     return ORTE_SUCCESS;
1416 }
1417 
1418 
1419 
1420 
1421 
1422 
1423 
1424 
1425 
1426 
1427 
1428 
1429 
1430 
1431 
1432 
1433 
1434 
1435 
1436 
1437 
1438 
1439 
1440 static int create_app(int argc, char* argv[],
1441                       orte_job_t *jdata,
1442                       orte_app_context_t **app_ptr,
1443                       bool *made_app, char ***app_env)
1444 {
1445     char cwd[OPAL_PATH_MAX];
1446     int i, j, count, rc;
1447     char *param, *value;
1448     orte_app_context_t *app = NULL;
1449     bool found = false;
1450     char *appname = NULL;
1451 
1452     *made_app = false;
1453 
1454     
1455 
1456 
1457 
1458 
1459 
1460 
1461 
1462 
1463 
1464     if (NULL != orte_cmd_options.appfile) {
1465         if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(argc, 0, argv))) {
1466             goto cleanup;
1467         }
1468     }
1469 
1470     
1471     init_globals();
1472     
1473 
1474     if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, true, false,
1475                                                   argc, argv)) ) {
1476         if (OPAL_ERR_SILENT != rc) {
1477             fprintf(stderr, "%s: command line error (%s)\n", argv[0],
1478                     opal_strerror(rc));
1479         }
1480         return rc;
1481     }
1482 
1483     
1484     if (NULL != orte_cmd_options.appfile) {
1485         return parse_appfile(jdata, strdup(orte_cmd_options.appfile), app_env);
1486     }
1487 
1488     
1489     app = OBJ_NEW(orte_app_context_t);
1490     opal_cmd_line_get_tail(orte_cmd_line, &count, &app->argv);
1491 
1492     
1493     if (0 == count) {
1494         orte_show_help("help-orterun.txt", "orterun:executable-not-specified",
1495                        true, orte_basename, orte_basename);
1496         rc = ORTE_ERR_NOT_FOUND;
1497         goto cleanup;
1498     }
1499 
1500     
1501 
1502 
1503 
1504 
1505 
1506 
1507     if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(argc, count, argv))) {
1508         goto cleanup;
1509     }
1510 
1511     
1512 
1513     app->env = opal_argv_copy(*app_env);
1514     if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orte_cmd_options.path,
1515                                                     orte_cmd_line,
1516                                                     environ, &app->env))) {
1517         goto cleanup;
1518     }
1519 
1520 
1521     
1522 
1523     if (NULL != orte_cmd_options.wdir) {
1524         
1525         if (opal_path_is_absolute(orte_cmd_options.wdir)) {
1526             app->cwd = strdup(orte_cmd_options.wdir);
1527         } else {
1528             
1529             if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
1530                 orte_show_help("help-orterun.txt", "orterun:init-failure",
1531                                true, "get the cwd", rc);
1532                 goto cleanup;
1533             }
1534             
1535             app->cwd = opal_os_path(false, cwd, orte_cmd_options.wdir, NULL);
1536         }
1537         orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1538     } else if (orte_cmd_options.set_cwd_to_session_dir) {
1539         orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1540         orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1541     } else {
1542         if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
1543             orte_show_help("help-orterun.txt", "orterun:init-failure",
1544                            true, "get the cwd", rc);
1545             goto cleanup;
1546         }
1547         app->cwd = strdup(cwd);
1548     }
1549 
1550     
1551 
1552 
1553 
1554 
1555     if (0 == total_num_apps) {
1556         
1557 
1558 
1559         if (opal_cmd_line_is_taken(orte_cmd_line, "noprefix")) {
1560             want_prefix_by_default = false;
1561         }
1562 
1563         
1564         if (opal_cmd_line_is_taken(orte_cmd_line, "prefix") || want_prefix_by_default) {
1565             size_t param_len;
1566             
1567 
1568 
1569             if (opal_cmd_line_is_taken(orte_cmd_line, "prefix") &&
1570                 NULL != orte_cmd_options.prefix) {
1571                 
1572                 param = strdup(opal_cmd_line_get_param(orte_cmd_line, "prefix", 0, 0));
1573                 
1574                 if (0 == strcmp(OPAL_PATH_SEP, &(param[strlen(param)-1]))) {
1575                     param[strlen(param)-1] = '\0';
1576                 }
1577                 value = strdup(orte_cmd_options.prefix);
1578                 if (0 == strcmp(OPAL_PATH_SEP, &(value[strlen(value)-1]))) {
1579                     value[strlen(value)-1] = '\0';
1580                 }
1581                 if (0 != strcmp(param, value)) {
1582                     orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict",
1583                                    true, orte_basename, value, param);
1584                     
1585 
1586 
1587                     free(param);
1588                     param = strdup(orte_cmd_options.prefix);
1589                 }
1590                 free(value);
1591             } else if (NULL != orte_cmd_options.prefix) {
1592                 param = strdup(orte_cmd_options.prefix);
1593             } else if (opal_cmd_line_is_taken(orte_cmd_line, "prefix")){
1594                 
1595                 param = strdup(opal_cmd_line_get_param(orte_cmd_line, "prefix", 0, 0));
1596             } else {
1597                 
1598                 param = strdup(opal_install_dirs.prefix);
1599             }
1600 
1601             if (NULL != param) {
1602                 
1603                 param_len = strlen(param);
1604                 while (0 == strcmp (OPAL_PATH_SEP, &(param[param_len-1]))) {
1605                     param[param_len-1] = '\0';
1606                     param_len--;
1607                     if (0 == param_len) {
1608                         orte_show_help("help-orterun.txt", "orterun:empty-prefix",
1609                                        true, orte_basename, orte_basename);
1610                         free(param);
1611                         return ORTE_ERR_FATAL;
1612                     }
1613                 }
1614                 orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, ORTE_ATTR_GLOBAL, param, OPAL_STRING);
1615                 free(param);
1616             }
1617         }
1618     }
1619 
1620     
1621     if (NULL != orte_cmd_options.pset) {
1622         orte_set_attribute(&app->attributes, ORTE_APP_PSET_NAME, ORTE_ATTR_GLOBAL,
1623                            orte_cmd_options.pset, OPAL_STRING);
1624     }
1625 
1626     
1627 
1628 
1629 
1630     if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "hostfile"))) {
1631         if(1 < j) {
1632             orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
1633                            true, orte_basename, NULL);
1634             return ORTE_ERR_FATAL;
1635         } else {
1636             value = opal_cmd_line_get_param(orte_cmd_line, "hostfile", 0, 0);
1637             orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_GLOBAL, value, OPAL_STRING);
1638         }
1639     }
1640     if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "machinefile"))) {
1641         if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) {
1642             orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
1643                            true, orte_basename, NULL);
1644             return ORTE_ERR_FATAL;
1645         } else {
1646             value = opal_cmd_line_get_param(orte_cmd_line, "machinefile", 0, 0);
1647             orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_GLOBAL, value, OPAL_STRING);
1648         }
1649     }
1650 
1651     
1652     if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "host"))) {
1653         char **targ=NULL, *tval;
1654         for (i = 0; i < j; ++i) {
1655             value = opal_cmd_line_get_param(orte_cmd_line, "host", i, 0);
1656             opal_argv_append_nosize(&targ, value);
1657         }
1658         tval = opal_argv_join(targ, ',');
1659         orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_GLOBAL, tval, OPAL_STRING);
1660         opal_argv_free(targ);
1661         free(tval);
1662     } else if (NULL != orte_default_dash_host) {
1663         orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL,
1664                            orte_default_dash_host, OPAL_STRING);
1665     }
1666 
1667     
1668     if (0 > orte_cmd_options.num_procs) {
1669         orte_show_help("help-orterun.txt", "orterun:negative-nprocs",
1670                        true, orte_basename, app->argv[0],
1671                        orte_cmd_options.num_procs, NULL);
1672         return ORTE_ERR_FATAL;
1673     }
1674 
1675     app->num_procs = (orte_std_cntr_t)orte_cmd_options.num_procs;
1676     total_num_apps++;
1677 
1678     
1679 
1680 
1681 
1682 
1683     if (NULL == strstr(app->argv[0], "java")) {
1684         if (orte_cmd_options.preload_binaries) {
1685             orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1686             orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1687             
1688             orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
1689         }
1690     }
1691     if (NULL != orte_cmd_options.preload_files) {
1692         orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, ORTE_ATTR_GLOBAL,
1693                            orte_cmd_options.preload_files, OPAL_STRING);
1694     }
1695 
1696     
1697 
1698 
1699 
1700 
1701     app->app = strdup(app->argv[0]);
1702     if (NULL == app->app) {
1703         orte_show_help("help-orterun.txt", "orterun:call-failed",
1704                        true, orte_basename, "library", "strdup returned NULL", errno);
1705         rc = ORTE_ERR_NOT_FOUND;
1706         goto cleanup;
1707     }
1708 
1709     
1710 
1711 
1712 
1713 
1714     appname = opal_basename(app->app);
1715     if (0 == strcmp(appname, "java")) {
1716         
1717         found = false;
1718         for (i=1; NULL != app->argv[i]; i++) {
1719             if (NULL != strstr(app->argv[i], "java.library.path")) {
1720                 char *dptr;
1721                 
1722                 if (NULL == (dptr = strchr(app->argv[i], '='))) {
1723                     
1724                     rc = ORTE_ERR_BAD_PARAM;
1725                     goto cleanup;
1726                 }
1727                 
1728                 ++dptr;
1729                 
1730                 found = true;
1731                 if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) {
1732                     
1733                     if (':' == app->argv[i][strlen(app->argv[i]-1)]) {
1734                         opal_asprintf(&value, "-Djava.library.path=%s%s", dptr, opal_install_dirs.libdir);
1735                     } else {
1736                         opal_asprintf(&value, "-Djava.library.path=%s:%s", dptr, opal_install_dirs.libdir);
1737                     }
1738                     free(app->argv[i]);
1739                     app->argv[i] = value;
1740                 }
1741                 break;
1742             }
1743         }
1744         if (!found) {
1745             
1746             opal_asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir);
1747             opal_argv_insert_element(&app->argv, 1, value);
1748             free(value);
1749         }
1750 
1751         
1752         found = false;
1753         for (i=1; NULL != app->argv[i]; i++) {
1754             if (NULL != strstr(app->argv[i], "cp") ||
1755                 NULL != strstr(app->argv[i], "classpath")) {
1756                 
1757                 found = true;
1758                 
1759                 value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
1760                 if (access(value, F_OK ) != -1) {
1761                     set_classpath_jar_file(app, i+1, "mpi.jar");
1762                 }
1763                 free(value);
1764                 
1765                 value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
1766                 if (access(value, F_OK ) != -1) {
1767                     set_classpath_jar_file(app, i+1, "shmem.jar");
1768                 }
1769                 free(value);
1770                 
1771                 opal_asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]);
1772                 free(app->argv[i+1]);
1773                 app->argv[i+1] = value;
1774                 break;
1775             }
1776         }
1777         if (!found) {
1778             
1779             found = false;  
1780             for (i=0; NULL != environ[i]; i++) {
1781                 if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) {
1782                     value = strchr(environ[i], '=');
1783                     ++value; 
1784                     opal_argv_insert_element(&app->argv, 1, value);
1785                     
1786                     value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
1787                     if (access(value, F_OK ) != -1) {
1788                         set_classpath_jar_file(app, 1, "mpi.jar");
1789                     }
1790                     free(value);
1791                     
1792                     value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
1793                     if (access(value, F_OK ) != -1) {
1794                         set_classpath_jar_file(app, 1, "shmem.jar");
1795                     }
1796                     free(value);
1797                     
1798                     opal_asprintf(&value, "%s:%s", app->cwd, app->argv[1]);
1799                     free(app->argv[1]);
1800                     app->argv[1] = value;
1801                     opal_argv_insert_element(&app->argv, 1, "-cp");
1802                     found = true;
1803                     break;
1804                 }
1805             }
1806             if (!found) {
1807                 
1808 
1809 
1810 
1811                 char *str, *str2;
1812                 
1813                 str = strdup(app->cwd);
1814                 
1815                 value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
1816                 if (access(value, F_OK ) != -1) {
1817                     opal_asprintf(&str2, "%s:%s", str, value);
1818                     free(str);
1819                     str = str2;
1820                 }
1821                 free(value);
1822                 
1823                 value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
1824                 if (access(value, F_OK ) != -1) {
1825                     opal_asprintf(&str2, "%s:%s", str, value);
1826                     free(str);
1827                     str = str2;
1828                 }
1829                 free(value);
1830                 opal_argv_insert_element(&app->argv, 1, str);
1831                 free(str);
1832                 opal_argv_insert_element(&app->argv, 1, "-cp");
1833             }
1834         }
1835         
1836         for (i=1; i < opal_argv_count(app->argv); i++) {
1837             if (NULL != strstr(app->argv[i], "java.library.path")) {
1838                 continue;
1839             } else if (NULL != strstr(app->argv[i], "cp") ||
1840                        NULL != strstr(app->argv[i], "classpath")) {
1841                 
1842                 i++;
1843                 continue;
1844             }
1845             
1846             opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env);
1847             
1848             if ((i+1) < opal_argv_count(app->argv)) {
1849                 value = opal_argv_join(&app->argv[i+1], ' ');
1850                 opal_setenv("OMPI_ARGV", value, true, &app->env);
1851                 free(value);
1852             }
1853             break;
1854         }
1855     } else {
1856         
1857         opal_setenv("OMPI_COMMAND", appname, true, &app->env);
1858         if (1 < opal_argv_count(app->argv)) {
1859             value = opal_argv_join(&app->argv[1], ' ');
1860             opal_setenv("OMPI_ARGV", value, true, &app->env);
1861             free(value);
1862         }
1863     }
1864 
1865     *app_ptr = app;
1866     app = NULL;
1867     *made_app = true;
1868 
1869     
1870 
1871  cleanup:
1872     if (NULL != app) {
1873         OBJ_RELEASE(app);
1874     }
1875     if (NULL != appname) {
1876         free(appname);
1877     }
1878     return rc;
1879 }
1880 
1881 static void set_classpath_jar_file(orte_app_context_t *app, int index, char *jarfile)
1882 {
1883     if (NULL == strstr(app->argv[index], jarfile)) {
1884         
1885         char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)]
1886                     ? "%s%s/%s" : "%s:%s/%s";
1887         char *str;
1888         opal_asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile);
1889         free(app->argv[index]);
1890         app->argv[index] = str;
1891     }
1892 }
1893 
1894 static int parse_appfile(orte_job_t *jdata, char *filename, char ***env)
1895 {
1896     size_t i, len;
1897     FILE *fp;
1898     char line[BUFSIZ];
1899     int rc, argc, app_num;
1900     char **argv;
1901     orte_app_context_t *app;
1902     bool blank, made_app;
1903     char bogus[] = "bogus ";
1904     char **tmp_env;
1905 
1906     
1907 
1908 
1909 
1910     if (NULL != orte_cmd_options.appfile) {
1911         free(orte_cmd_options.appfile);
1912         orte_cmd_options.appfile = NULL;
1913     }
1914 
1915     
1916 
1917     fp = fopen(filename, "r");
1918     if (NULL == fp) {
1919         orte_show_help("help-orterun.txt", "orterun:appfile-not-found", true,
1920                        filename);
1921         return ORTE_ERR_NOT_FOUND;
1922     }
1923 
1924     
1925 
1926     line[sizeof(line) - 1] = '\0';
1927     app_num = 0;
1928     do {
1929 
1930         
1931 
1932 
1933 
1934 
1935 
1936         line[0] = '\0';
1937         strcat(line, bogus);
1938 
1939         if (NULL == fgets(line + sizeof(bogus) - 1,
1940                           sizeof(line) - sizeof(bogus) - 1, fp)) {
1941             break;
1942         }
1943 
1944         
1945 
1946         len = strlen(line);
1947         if (len > 0 && '\n' == line[len - 1]) {
1948             line[len - 1] = '\0';
1949             if (len > 0) {
1950                 --len;
1951             }
1952         }
1953 
1954         
1955 
1956         for (i = 0; i < len; ++i) {
1957             if ('#' == line[i]) {
1958                 line[i] = '\0';
1959                 break;
1960             } else if (i + 1 < len && '/' == line[i] && '/' == line[i + 1]) {
1961                 line[i] = '\0';
1962                 break;
1963             }
1964         }
1965 
1966         
1967 
1968         len = strlen(line);
1969         for (blank = true, i = sizeof(bogus); i < len; ++i) {
1970             if (!isspace(line[i])) {
1971                 blank = false;
1972                 break;
1973             }
1974         }
1975         if (blank) {
1976             continue;
1977         }
1978 
1979         
1980 
1981         argv = opal_argv_split(line, ' ');
1982         argc = opal_argv_count(argv);
1983         if (argc > 0) {
1984 
1985             
1986 
1987 
1988 
1989 
1990 
1991 
1992 
1993 
1994 
1995 
1996 
1997             if (NULL != *env) {
1998                 tmp_env = opal_argv_copy(*env);
1999                 if (NULL == tmp_env) {
2000                     fclose(fp);
2001                     opal_argv_free(argv);
2002                     return ORTE_ERR_OUT_OF_RESOURCE;
2003                 }
2004             } else {
2005                 tmp_env = NULL;
2006             }
2007 
2008             rc = create_app(argc, argv, jdata, &app, &made_app, &tmp_env);
2009             if (ORTE_SUCCESS != rc) {
2010                 
2011 
2012                 exit(1);
2013             }
2014             if (NULL != tmp_env) {
2015                 opal_argv_free(tmp_env);
2016             }
2017             if (made_app) {
2018                 app->idx = app_num;
2019                 ++app_num;
2020                 opal_pointer_array_add(jdata->apps, app);
2021                 ++jdata->num_apps;
2022             }
2023         }
2024         opal_argv_free(argv);
2025     } while (!feof(fp));
2026     fclose(fp);
2027 
2028     
2029 
2030     free(filename);
2031 
2032     return ORTE_SUCCESS;
2033 }
2034 
2035 static void launch_recv(int status, orte_process_name_t* sender,
2036                         opal_buffer_t *buffer,
2037                         orte_rml_tag_t tag, void *cbdata)
2038 {
2039     int rc;
2040     int32_t ret;
2041     int32_t cnt;
2042     orte_jobid_t jobid;
2043     orte_app_context_t *app;
2044     orte_proc_t *proc;
2045     orte_node_t *node;
2046     int tool_job_index;
2047     trackr_t *trk;
2048 
2049     
2050     cnt = 1;
2051     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT32))) {
2052         ORTE_ERROR_LOG(rc);
2053         ORTE_UPDATE_EXIT_STATUS(rc);
2054         return;
2055     }
2056     
2057     ORTE_UPDATE_EXIT_STATUS(ret);
2058 
2059     
2060     cnt = 1;
2061     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &cnt, ORTE_JOBID))) {
2062         ORTE_ERROR_LOG(rc);
2063         ORTE_UPDATE_EXIT_STATUS(rc);
2064         return;
2065     }
2066 
2067     
2068     cnt = 1;
2069     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tool_job_index, &cnt, OPAL_INT))) {
2070         ORTE_ERROR_LOG(rc);
2071         ORTE_UPDATE_EXIT_STATUS(rc);
2072         return;
2073     }
2074 
2075     
2076     if (NULL == (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, tool_job_index))) {
2077         opal_output(0, "SPAWN OF TRACKER ID %d RETURNED INDEX TO NULL OBJECT", tool_job_index);
2078         return;
2079     }
2080     trk->jdata->jobid = jobid;
2081 
2082     if (ORTE_SUCCESS == ret) {
2083         printf("[ORTE] Task: %d is launched! (Job ID: %s)\n", tool_job_index, ORTE_JOBID_PRINT(jobid));
2084     } else {
2085         
2086         cnt = 1;
2087         if (OPAL_SUCCESS == opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T)) {
2088             cnt = 1;
2089             opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC);
2090             proc->exit_code = ret;
2091             app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx);
2092             cnt = 1;
2093             opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE);
2094             orte_print_aborted_job(trk->jdata, app, proc, node);
2095         }
2096     }
2097 
2098     
2099     if (NULL != trk->launch_cb) {
2100         trk->launch_cb(tool_job_index, trk->jdata, ret, trk->launch_cbdata);
2101     }
2102 
2103     
2104     if (ORTE_SUCCESS != ret) {
2105         opal_pointer_array_set_item(&tool_jobs, tool_job_index, NULL);
2106         OBJ_RELEASE(trk);
2107     }
2108 }
2109 
2110 static void complete_recv(int status, orte_process_name_t* sender,
2111                           opal_buffer_t *buffer,
2112                           orte_rml_tag_t tag, void *cbdata)
2113 {
2114     int rc, ret;
2115     int32_t cnt;
2116     orte_jobid_t jobid;
2117     orte_app_context_t *app;
2118     orte_proc_t *proc;
2119     orte_node_t *node;
2120     int tool_job_index;
2121     trackr_t *trk;
2122 
2123     
2124     cnt = 1;
2125     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) {
2126         ORTE_ERROR_LOG(rc);
2127         ORTE_UPDATE_EXIT_STATUS(rc);
2128         return;
2129     }
2130 
2131     
2132     cnt = 1;
2133     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &cnt, ORTE_JOBID))) {
2134         ORTE_ERROR_LOG(rc);
2135         ORTE_UPDATE_EXIT_STATUS(rc);
2136         return;
2137     }
2138 
2139     
2140     cnt = 1;
2141     if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tool_job_index, &cnt, OPAL_INT))) {
2142         ORTE_ERROR_LOG(rc);
2143         ORTE_UPDATE_EXIT_STATUS(rc);
2144         return;
2145     }
2146 
2147     
2148     if (NULL == (trk = (trackr_t*)opal_pointer_array_get_item(&tool_jobs, tool_job_index))) {
2149         opal_output(0, "TRACKER ID %d RETURNED INDEX TO NULL OBJECT", tool_job_index);
2150         return;
2151     }
2152 
2153     if (ORTE_SUCCESS == ret) {
2154         printf("[ORTE] Task: %d returned: %d (Job ID: %s)\n", tool_job_index, ret, ORTE_JOBID_PRINT(jobid));
2155     } else {
2156         
2157         cnt = 1;
2158         opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T);
2159         cnt = 1;
2160         opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC);
2161         proc->exit_code = ret;
2162         app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx);
2163         cnt = 1;
2164         opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE);
2165         orte_print_aborted_job(trk->jdata, app, proc, node);
2166     }
2167 
2168     
2169     if (NULL != trk && NULL != trk->complete_cb) {
2170         trk->complete_cb(tool_job_index, trk->jdata, ret, trk->complete_cbdata);
2171     }
2172     
2173     opal_pointer_array_set_item(&tool_jobs, tool_job_index, NULL);
2174     OBJ_RELEASE(trk);
2175 }
2176 
2177 
2178 
2179 
2180 
2181 
2182 
2183 
2184 
2185 
2186 
2187 
2188 
2189 
2190 
2191 
2192 
2193 
2194 
2195 
2196 
2197 
2198 
2199 
2200 
2201 
2202 
2203 
2204 
2205 
2206 
2207 
2208 
2209 
2210 
2211 
2212 
2213 
2214 
2215 
2216 
2217 
2218 
2219 
2220 
2221 
2222 
2223 
2224 
2225 
2226 
2227 
2228 
2229 
2230 
2231 
2232 
2233 
2234 
2235 
2236 
2237 
2238 
2239 
2240 
2241 
2242 
2243 
2244 
2245 
2246 
2247 
2248 #define DUMP_INT(X) fprintf(stderr, "  %s = %d\n", # X, X);
2249 #define FILE_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
2250 
2251 struct MPIR_PROCDESC {
2252     char *host_name;        
2253     char *executable_name;  
2254     int pid;                
2255 };
2256 
2257 
2258 
2259 
2260 
2261 
2262 
2263 
2264 static bool mpir_warning_printed = false;
2265 
2266 static void orte_debugger_init_before_spawn(orte_job_t *jdata)
2267 {
2268     char *env_name;
2269     orte_app_context_t *app;
2270     int i;
2271     char *attach_fifo;
2272 
2273     if (!MPIR_being_debugged && !orte_in_parallel_debugger) {
2274         
2275 
2276 
2277         if (NULL != orte_debugger_test_daemon && !orte_debugger_test_attach) {
2278             opal_output_verbose(2, orte_debug_output,
2279                                 "%s Debugger test daemon specified: %s",
2280                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2281                                 orte_debugger_test_daemon);
2282             goto launchit;
2283         }
2284         
2285 
2286 
2287         if (0 < orte_debugger_check_rate) {
2288             opal_output_verbose(2, orte_debug_output,
2289                                 "%s Setting debugger attach check rate for %d seconds",
2290                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2291                                 orte_debugger_check_rate);
2292             ORTE_TIMER_EVENT(orte_debugger_check_rate, 0, attach_debugger, ORTE_SYS_PRI);
2293         } else if (orte_create_session_dirs) {
2294             
2295 
2296 
2297             attach_fifo = opal_os_path(false, orte_process_info.job_session_dir,
2298                                        "debugger_attach_fifo", NULL);
2299             if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) {
2300                 opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno);
2301                 free(attach_fifo);
2302                 return;
2303             }
2304             opal_string_copy(MPIR_attach_fifo, attach_fifo,
2305                              MPIR_MAX_PATH_LENGTH);
2306             free(attach_fifo);
2307             open_fifo();
2308         }
2309         return;
2310     }
2311 
2312  launchit:
2313     opal_output_verbose(1, orte_debug_output, "Info: Spawned by a debugger");
2314 
2315     
2316     if (!mpir_warning_printed) {
2317         mpir_warning_printed = true;
2318         
2319         if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) {
2320             orte_show_help("help-orted.txt", "mpir-debugger-detected", true);
2321         }
2322     }
2323 
2324     
2325     (void) mca_base_var_env_name ("orte_in_parallel_debugger", &env_name);
2326 
2327     for (i=0; i < jdata->apps->size; i++) {
2328         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
2329             continue;
2330         }
2331         opal_setenv(env_name, "1", true, &app->env);
2332     }
2333     free(env_name);
2334 
2335     
2336     if (orte_create_session_dirs) {
2337         
2338 
2339 
2340         attach_fifo = opal_os_path(false, orte_process_info.job_session_dir,
2341                                    "debugger_attach_fifo", NULL);
2342         if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) {
2343             opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno);
2344             free(attach_fifo);
2345             return;
2346         }
2347         strncpy(MPIR_attach_fifo, attach_fifo, MPIR_MAX_PATH_LENGTH - 1);
2348         free(attach_fifo);
2349         open_fifo();
2350     }
2351 }
2352 
2353 static bool mpir_breakpoint_fired = false;
2354 
2355 static void _send_notification(int status)
2356 {
2357     opal_buffer_t buf;
2358     orte_grpcomm_signature_t sig;
2359     int rc;
2360     opal_value_t kv, *kvptr;
2361 
2362     OBJ_CONSTRUCT(&buf, opal_buffer_t);
2363 
2364     
2365     if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
2366         ORTE_ERROR_LOG(rc);
2367         OBJ_DESTRUCT(&buf);
2368         return;
2369     }
2370 
2371     
2372     if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
2373         ORTE_ERROR_LOG(rc);
2374         OBJ_DESTRUCT(&buf);
2375         return;
2376     }
2377 
2378     
2379     status = 1;
2380     if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &status, 1, OPAL_INT))) {
2381         ORTE_ERROR_LOG(rc);
2382         OBJ_DESTRUCT(&buf);
2383         return;
2384     }
2385     OBJ_CONSTRUCT(&kv, opal_value_t);
2386     kv.key = strdup(OPAL_PMIX_EVENT_NON_DEFAULT);
2387     kv.type = OPAL_BOOL;
2388     kv.data.flag = true;
2389     kvptr = &kv;
2390     if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &kvptr, 1, OPAL_VALUE))) {
2391         ORTE_ERROR_LOG(rc);
2392         OBJ_DESTRUCT(&kv);
2393         OBJ_DESTRUCT(&buf);
2394         return;
2395     }
2396     OBJ_DESTRUCT(&kv);
2397 
2398     
2399     OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t);
2400     sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
2401     sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
2402     sig.signature[0].vpid = ORTE_VPID_WILDCARD;
2403     sig.sz = 1;
2404 
2405     if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(&sig, ORTE_RML_TAG_NOTIFICATION, &buf))) {
2406         ORTE_ERROR_LOG(rc);
2407     }
2408     OBJ_DESTRUCT(&sig);
2409     OBJ_DESTRUCT(&buf);
2410 }
2411 
2412 static void orte_debugger_dump(void)
2413 {
2414     int i;
2415 
2416     DUMP_INT(MPIR_being_debugged);
2417     DUMP_INT(MPIR_debug_state);
2418     DUMP_INT(MPIR_partial_attach_ok);
2419     DUMP_INT(MPIR_i_am_starter);
2420     DUMP_INT(MPIR_forward_output);
2421     DUMP_INT(MPIR_proctable_size);
2422     fprintf(stderr, "  MPIR_proctable:\n");
2423     for (i = 0; i < MPIR_proctable_size; i++) {
2424         fprintf(stderr,
2425                 "    (i, host, exe, pid) = (%d, %s, %s, %d)\n",
2426                 i,
2427                 MPIR_proctable[i].host_name,
2428                 MPIR_proctable[i].executable_name,
2429                 MPIR_proctable[i].pid);
2430     }
2431     fprintf(stderr, "MPIR_executable_path: %s\n",
2432             ('\0' == MPIR_executable_path[0]) ?
2433             "NULL" : (char*) MPIR_executable_path);
2434     fprintf(stderr, "MPIR_server_arguments: %s\n",
2435             ('\0' == MPIR_server_arguments[0]) ?
2436             "NULL" : (char*) MPIR_server_arguments);
2437 }
2438 
2439 static void setup_debugger_job(orte_jobid_t jobid)
2440 {
2441     orte_job_t *debugger;
2442     orte_app_context_t *app;
2443     int rc;
2444     char cwd[OPAL_PATH_MAX];
2445     bool flag = true;
2446 
2447     
2448     debugger = OBJ_NEW(orte_job_t);
2449     
2450 
2451 
2452     orte_plm_base_create_jobid(debugger);
2453     
2454     opal_argv_append_nosize(&debugger->personality, "orte");
2455     
2456     ORTE_FLAG_SET(debugger, ORTE_JOB_FLAG_DEBUGGER_DAEMON);
2457     
2458     if (!MPIR_forward_output) {
2459         ORTE_FLAG_SET(debugger, ORTE_JOB_FLAG_FORWARD_OUTPUT);
2460     }
2461     
2462     debugger->stdin_target = ORTE_VPID_INVALID;
2463     
2464     opal_hash_table_set_value_uint32(orte_job_data, debugger->jobid, debugger);
2465     
2466     app = OBJ_NEW(orte_app_context_t);
2467     if (NULL != orte_debugger_test_daemon) {
2468         app->app = strdup(orte_debugger_test_daemon);
2469     } else {
2470         app->app = strdup((char*)MPIR_executable_path);
2471     }
2472     
2473 
2474 
2475     if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
2476         orte_show_help("help-orterun.txt", "orterun:init-failure",
2477                        true, "get the cwd", rc);
2478         return;
2479     }
2480     app->cwd = strdup(cwd);
2481     orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
2482     opal_argv_append_nosize(&app->argv, app->app);
2483     build_debugger_args(app);
2484     opal_pointer_array_add(debugger->apps, app);
2485     debugger->num_apps = 1;
2486     
2487     debugger->map = OBJ_NEW(orte_job_map_t);
2488     ORTE_SET_MAPPING_POLICY(debugger->map->mapping, ORTE_MAPPING_PPR);
2489     ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_GIVEN);
2490     ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_DEBUGGER);
2491     
2492     debugger->map->ppr = strdup("1:node");
2493     
2494     if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&debugger->map->binding, "none"))) {
2495         ORTE_ERROR_LOG(rc);
2496         return;
2497     }
2498     
2499     rc = orte_plm.spawn(debugger);
2500     if (ORTE_SUCCESS != rc) {
2501         ORTE_ERROR_LOG(rc);
2502     }
2503 }
2504 
2505 
2506 
2507 
2508 
2509 
2510 
2511 
2512 
2513 
2514 void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
2515 {
2516     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
2517     orte_job_t *jdata = caddy->jdata;
2518     orte_proc_t *proc;
2519     orte_app_context_t *appctx;
2520     orte_vpid_t i, j;
2521     char **aliases, *aptr;
2522 
2523     
2524 
2525 
2526 
2527     if (MPIR_proctable || 0 == jdata->num_procs) {
2528 
2529         
2530         opal_output_verbose(5, orte_debug_output,
2531                             "%s: debugger already initialized or zero procs",
2532                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
2533 
2534         if (MPIR_being_debugged || NULL != orte_debugger_test_daemon ||
2535             NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
2536             OBJ_RELEASE(caddy);
2537             
2538             if (!mpir_warning_printed) {
2539                 mpir_warning_printed = true;
2540                 
2541                 if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) {
2542                     orte_show_help("help-orted.txt", "mpir-debugger-detected", true);
2543                 }
2544             }
2545             if (!mpir_breakpoint_fired) {
2546                 
2547                 mpir_breakpoint_fired = true;
2548 
2549                 
2550                 MPIR_Breakpoint();
2551 
2552                 opal_output_verbose(5, orte_debug_output,
2553                                     "%s NOTIFYING DEBUGGER RELEASE",
2554                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
2555                 
2556                 _send_notification(OPAL_ERR_DEBUGGER_RELEASE);
2557             }
2558         }
2559         return;
2560     }
2561 
2562     
2563 
2564     opal_output_verbose(5, orte_debug_output,
2565                         "%s: Setting up debugger process table for applications",
2566                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
2567 
2568     MPIR_debug_state = 1;
2569 
2570     
2571     MPIR_proctable_size = jdata->num_procs;
2572 
2573     
2574     MPIR_proctable = (struct MPIR_PROCDESC *)malloc(sizeof(struct MPIR_PROCDESC) *
2575                                                     MPIR_proctable_size);
2576     if (MPIR_proctable == NULL) {
2577         ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
2578         OBJ_RELEASE(caddy);
2579         return;
2580     }
2581 
2582     if (orte_debugger_dump_proctable) {
2583         opal_output(orte_clean_output, "MPIR Proctable for job %s", ORTE_JOBID_PRINT(jdata->jobid));
2584     }
2585 
2586     
2587     for (j=0; j < jdata->num_procs; j++) {
2588         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
2589             continue;
2590         }
2591         
2592 
2593 
2594         i = proc->name.vpid;
2595         if (NULL == (appctx = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx))) {
2596             continue;
2597         }
2598 
2599         
2600         if (orte_retain_aliases) {
2601             aliases = NULL;
2602             aptr = NULL;
2603             if (orte_get_attribute(&proc->node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING)) {
2604                 aliases = opal_argv_split(aptr, ',');
2605                 free(aptr);
2606                 if (orte_use_hostname_alias <= opal_argv_count(aliases)) {
2607                     MPIR_proctable[i].host_name = strdup(aliases[orte_use_hostname_alias-1]);
2608                 }
2609                 opal_argv_free(aliases);
2610             }
2611         } else {
2612             
2613             MPIR_proctable[i].host_name = strdup(proc->node->name);
2614         }
2615 
2616         if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
2617             MPIR_proctable[i].executable_name =
2618                 opal_os_path( false, appctx->app, NULL );
2619         } else {
2620             MPIR_proctable[i].executable_name =
2621                 opal_os_path( false, appctx->cwd, appctx->app, NULL );
2622         }
2623         MPIR_proctable[i].pid = proc->pid;
2624         if (orte_debugger_dump_proctable) {
2625             opal_output(orte_clean_output, "%s: Host %s Exe %s Pid %d",
2626                         ORTE_VPID_PRINT(i), MPIR_proctable[i].host_name,
2627                         MPIR_proctable[i].executable_name, MPIR_proctable[i].pid);
2628         }
2629     }
2630 
2631     if (0 < opal_output_get_verbosity(orte_debug_output)) {
2632         orte_debugger_dump();
2633     }
2634 
2635     
2636 
2637 
2638     if (MPIR_being_debugged || NULL != orte_debugger_test_daemon ||
2639         NULL != getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
2640         
2641         if (!mpir_warning_printed) {
2642             mpir_warning_printed = true;
2643             
2644             if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) {
2645                 orte_show_help("help-orted.txt", "mpir-debugger-detected", true);
2646             }
2647         }
2648 
2649         
2650 
2651 
2652 
2653         if ('\0' == MPIR_executable_path[0] && NULL == orte_debugger_test_daemon) {
2654             
2655             mpir_breakpoint_fired = true;
2656 
2657             
2658             MPIR_Breakpoint();
2659 
2660             opal_output_verbose(2, orte_debug_output,
2661                                 "%s NOTIFYING DEBUGGER RELEASE",
2662                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
2663             
2664             _send_notification(OPAL_ERR_DEBUGGER_RELEASE);
2665         } else if (!orte_debugger_test_attach) {
2666             
2667 
2668 
2669 
2670             opal_output_verbose(2, orte_debug_output,
2671                                 "%s Cospawning debugger daemons %s",
2672                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2673                                 (NULL == orte_debugger_test_daemon) ?
2674                                 MPIR_executable_path : orte_debugger_test_daemon);
2675             setup_debugger_job(jdata->jobid);
2676         }
2677         
2678         OBJ_RELEASE(caddy);
2679         return;
2680     }
2681 
2682     
2683     OBJ_RELEASE(caddy);
2684 }
2685 
2686 
2687 
2688 
2689 
2690 
2691 static int process(char *orig_line, char *basename, opal_cmd_line_t *cmd_line,
2692                    int argc, char **argv, char ***new_argv, int num_procs)
2693 {
2694     int ret = ORTE_SUCCESS;
2695     int i, j, count;
2696     char *line = NULL, *tmp = NULL, *full_line = strdup(orig_line);
2697     char **orterun_argv = NULL, **executable_argv = NULL, **line_argv = NULL;
2698     char cwd[OPAL_PATH_MAX];
2699     bool used_num_procs = false;
2700     bool single_app = false;
2701     bool fail_needed_executable = false;
2702 
2703     line = full_line;
2704     if (NULL == line) {
2705         ret = ORTE_ERR_OUT_OF_RESOURCE;
2706         goto out;
2707     }
2708 
2709     
2710 
2711     for (i = 0; '\0' != line[i] && isspace(line[i]); ++line) {
2712         continue;
2713     }
2714     for (i = strlen(line) - 2; i > 0 && isspace(line[i]); ++i) {
2715         line[i] = '\0';
2716     }
2717     if (strlen(line) <= 0) {
2718         ret = ORTE_ERROR;
2719         goto out;
2720     }
2721 
2722     
2723 
2724 
2725     opal_cmd_line_get_tail(cmd_line, &i, &executable_argv);
2726 
2727     
2728 
2729 
2730 
2731     orterun_argv = opal_argv_copy(argv);
2732     count = opal_argv_count(orterun_argv);
2733     opal_argv_delete(&count, &orterun_argv, 0, 1);
2734     for (i = 0; NULL != orterun_argv[i]; ++i) {
2735         count = opal_argv_count(orterun_argv);
2736         if (0 == strcmp(orterun_argv[i], "-debug") ||
2737             0 == strcmp(orterun_argv[i], "--debug")) {
2738             opal_argv_delete(&count, &orterun_argv, i, 1);
2739         } else if (0 == strcmp(orterun_argv[i], "-tv") ||
2740                    0 == strcmp(orterun_argv[i], "--tv")) {
2741             opal_argv_delete(&count, &orterun_argv, i, 1);
2742         } else if (0 == strcmp(orterun_argv[i], "--debugger") ||
2743                    0 == strcmp(orterun_argv[i], "-debugger")) {
2744             opal_argv_delete(&count, &orterun_argv, i, 2);
2745         }
2746     }
2747 
2748     
2749 
2750 
2751     *new_argv = NULL;
2752     line_argv = opal_argv_split(line, ' ');
2753     if (NULL == line_argv) {
2754         ret = ORTE_ERR_NOT_FOUND;
2755         goto out;
2756     }
2757     for (i = 0; NULL != line_argv[i]; ++i) {
2758         if (0 == strcmp(line_argv[i], "@mpirun@") ||
2759             0 == strcmp(line_argv[i], "@orterun@")) {
2760             opal_argv_append_nosize(new_argv, argv[0]);
2761         } else if (0 == strcmp(line_argv[i], "@mpirun_args@") ||
2762                    0 == strcmp(line_argv[i], "@orterun_args@")) {
2763             for (j = 0; NULL != orterun_argv && NULL != orterun_argv[j]; ++j) {
2764                 opal_argv_append_nosize(new_argv, orterun_argv[j]);
2765             }
2766         } else if (0 == strcmp(line_argv[i], "@np@")) {
2767             used_num_procs = true;
2768             opal_asprintf(&tmp, "%d", num_procs);
2769             opal_argv_append_nosize(new_argv, tmp);
2770             free(tmp);
2771         } else if (0 == strcmp(line_argv[i], "@single_app@")) {
2772             
2773 
2774             single_app = true;
2775         } else if (0 == strcmp(line_argv[i], "@executable@")) {
2776             
2777 
2778             if (NULL != executable_argv) {
2779                 opal_argv_append_nosize(new_argv, executable_argv[0]);
2780             } else {
2781                 fail_needed_executable = true;
2782             }
2783         } else if (0 == strcmp(line_argv[i], "@executable_argv@")) {
2784             
2785 
2786             if (NULL != executable_argv) {
2787                 for (j = 1; NULL != executable_argv[j]; ++j) {
2788                     opal_argv_append_nosize(new_argv, executable_argv[j]);
2789                 }
2790             } else {
2791                 fail_needed_executable = true;
2792             }
2793         } else {
2794             
2795             opal_argv_append_nosize(new_argv, line_argv[i]);
2796         }
2797     }
2798 
2799     
2800 
2801     getcwd(cwd, OPAL_PATH_MAX);
2802     tmp = opal_path_findv((*new_argv)[0], X_OK, environ, cwd);
2803     if (NULL != tmp) {
2804         free(tmp);
2805 
2806         
2807 
2808         tmp = opal_argv_join(argv, ' ');
2809 
2810         
2811 
2812 
2813         if (used_num_procs && 0 == num_procs) {
2814             free(tmp);
2815             tmp = opal_argv_join(orterun_argv, ' ');
2816             orte_show_help("help-orterun.txt", "debugger requires -np",
2817                            true, (*new_argv)[0], argv[0], tmp,
2818                            (*new_argv)[0]);
2819             
2820         }
2821 
2822         
2823         else if (single_app && NULL != strstr(tmp, " : ")) {
2824             orte_show_help("help-orterun.txt",
2825                            "debugger only accepts single app", true,
2826                            (*new_argv)[0], (*new_argv)[0]);
2827             
2828         }
2829 
2830         
2831 
2832 
2833         else if (fail_needed_executable) {
2834             orte_show_help("help-orterun.txt",
2835                            "debugger requires executable", true,
2836                            (*new_argv)[0], argv[0], (*new_argv)[0], argv[0],
2837                            (*new_argv)[0]);
2838             
2839         }
2840 
2841         
2842         else {
2843             goto out;
2844         }
2845     }
2846 
2847     
2848 
2849     opal_argv_free(*new_argv);
2850     *new_argv = NULL;
2851     ret = ORTE_ERR_NOT_FOUND;
2852 
2853  out:
2854     if (NULL != orterun_argv) {
2855         opal_argv_free(orterun_argv);
2856     }
2857     if (NULL != executable_argv) {
2858         opal_argv_free(executable_argv);
2859     }
2860     if (NULL != line_argv) {
2861         opal_argv_free(line_argv);
2862     }
2863     if (NULL != tmp) {
2864         free(tmp);
2865     }
2866     if (NULL != full_line) {
2867         free(full_line);
2868     }
2869     return ret;
2870 }
2871 
2872 static void open_fifo(void)
2873 {
2874     if (orte_debugger_attach_fd > 0) {
2875         close(orte_debugger_attach_fd);
2876     }
2877 
2878     orte_debugger_attach_fd = open(MPIR_attach_fifo, O_RDONLY | O_NONBLOCK, 0);
2879     if (orte_debugger_attach_fd < 0) {
2880         opal_output(0, "%s unable to open debugger attach fifo",
2881                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
2882         return;
2883     }
2884 
2885     
2886     if (opal_fd_set_cloexec(orte_debugger_attach_fd) != OPAL_SUCCESS) {
2887         opal_output(0, "%s unable to set debugger attach fifo to CLOEXEC",
2888                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
2889         close(orte_debugger_attach_fd);
2890         orte_debugger_attach_fd = -1;
2891         return;
2892     }
2893 
2894     if (orte_debugger_test_attach) {
2895         opal_output(0, "%s Monitoring debugger attach fifo %s",
2896                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2897                     MPIR_attach_fifo);
2898     } else {
2899         opal_output_verbose(2, orte_debug_output,
2900                             "%s Monitoring debugger attach fifo %s",
2901                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2902                             MPIR_attach_fifo);
2903     }
2904     orte_debugger_attach = (opal_event_t*)malloc(sizeof(opal_event_t));
2905     opal_event_set(orte_event_base, orte_debugger_attach, orte_debugger_attach_fd,
2906                    OPAL_EV_READ, attach_debugger, orte_debugger_attach);
2907 
2908     orte_debugger_fifo_active = true;
2909     opal_event_add(orte_debugger_attach, 0);
2910 }
2911 
2912 static bool did_once = false;
2913 
2914 static void attach_debugger(int fd, short event, void *arg)
2915 {
2916     unsigned char fifo_cmd;
2917     int rc;
2918     orte_timer_t *tm;
2919 
2920     if (orte_debugger_fifo_active) {
2921         orte_debugger_attach = (opal_event_t*)arg;
2922         orte_debugger_fifo_active = false;
2923 
2924         rc = read(orte_debugger_attach_fd, &fifo_cmd, sizeof(fifo_cmd));
2925         if (!rc) {
2926             
2927             opal_event_free(orte_debugger_attach);
2928             
2929             open_fifo();
2930             return;
2931         }
2932         if (1 != fifo_cmd) {
2933             
2934             orte_debugger_fifo_active = true;
2935             opal_event_add(orte_debugger_attach, 0);
2936             return;
2937         }
2938     }
2939 
2940     if (!MPIR_being_debugged && !orte_debugger_test_attach) {
2941         
2942         if (0 == orte_debugger_check_rate) {
2943             orte_debugger_fifo_active = true;
2944             opal_event_add(orte_debugger_attach, 0);
2945         } else if (!MPIR_being_debugged) {
2946             tm = (orte_timer_t*)arg;
2947             
2948             opal_event_evtimer_add(tm->ev, &tm->tv);
2949         }
2950         return;
2951     }
2952 
2953     opal_output_verbose(1, orte_debug_output,
2954                         "%s Attaching debugger %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2955                         (NULL == orte_debugger_test_daemon) ? MPIR_executable_path : orte_debugger_test_daemon);
2956 
2957     
2958     if (!mpir_warning_printed) {
2959         mpir_warning_printed = true;
2960         
2961         if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) {
2962             orte_show_help("help-orted.txt", "mpir-debugger-detected", true);
2963         }
2964     }
2965 
2966     
2967 
2968 
2969 
2970     if ('\0' != MPIR_executable_path[0] || NULL != orte_debugger_test_daemon) {
2971         opal_output_verbose(2, orte_debug_output,
2972                             "%s Spawning debugger daemons %s",
2973                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
2974                             (NULL == orte_debugger_test_daemon) ?
2975                             MPIR_executable_path : orte_debugger_test_daemon);
2976         setup_debugger_job(ORTE_JOBID_WILDCARD);
2977         did_once = true;
2978     }
2979 
2980     
2981     if (NULL != orte_debugger_test_daemon && did_once) {
2982         return;
2983     }
2984 
2985     
2986     if (0 == orte_debugger_check_rate) {
2987         orte_debugger_fifo_active = true;
2988         opal_event_add(orte_debugger_attach, 0);
2989     } else if (!MPIR_being_debugged) {
2990         tm = (orte_timer_t*)arg;
2991         
2992         opal_event_evtimer_add(tm->ev, &tm->tv);
2993     }
2994 }
2995 
2996 static void build_debugger_args(orte_app_context_t *debugger)
2997 {
2998     int i, j;
2999     char mpir_arg[MPIR_MAX_ARG_LENGTH];
3000 
3001     if ('\0' != MPIR_server_arguments[0]) {
3002         j=0;
3003         memset(mpir_arg, 0, MPIR_MAX_ARG_LENGTH);
3004         for (i=0; i < MPIR_MAX_ARG_LENGTH; i++) {
3005             if (MPIR_server_arguments[i] == '\0') {
3006                 if (0 < j) {
3007                     opal_argv_append_nosize(&debugger->argv, mpir_arg);
3008                     memset(mpir_arg, 0, MPIR_MAX_ARG_LENGTH);
3009                     j=0;
3010                 }
3011             } else {
3012                 mpir_arg[j] = MPIR_server_arguments[i];
3013                 j++;
3014             }
3015         }
3016     }
3017 }
3018 
3019 
3020 
3021 
3022 static void run_debugger(char *basename, opal_cmd_line_t *cmd_line,
3023                          int argc, char *argv[], int num_procs)
3024 {
3025     int i, id, ret;
3026     char **new_argv = NULL;
3027     const char **tmp = NULL;
3028     char *value, **lines, *env_name;
3029 
3030     
3031 
3032 
3033     id = mca_base_var_find("orte", "orte", NULL, "base_user_debugger");
3034     if (id < 0) {
3035         orte_show_help("help-orterun.txt", "debugger-mca-param-not-found",
3036                        true);
3037         exit(1);
3038     }
3039 
3040     ret = mca_base_var_get_value (id, &tmp, NULL, NULL);
3041     if (OPAL_SUCCESS != ret || NULL == tmp || NULL == tmp[0]) {
3042         orte_show_help("help-orterun.txt", "debugger-orte_base_user_debugger-empty",
3043                        true);
3044         exit(1);
3045     }
3046 
3047     
3048 
3049     lines = opal_argv_split(tmp[0], ':');
3050     for (i = 0; NULL != lines[i]; ++i) {
3051         if (ORTE_SUCCESS == process(lines[i], basename, cmd_line, argc, argv,
3052                                     &new_argv, num_procs)) {
3053             break;
3054         }
3055     }
3056 
3057     
3058 
3059     if (NULL == lines[i]) {
3060         orte_show_help("help-orterun.txt", "debugger-not-found", true);
3061         exit(1);
3062     }
3063     opal_argv_free(lines);
3064 
3065     
3066 
3067     
3068     memset((char*)MPIR_executable_path, 0, MPIR_MAX_PATH_LENGTH);
3069     memset((char*)MPIR_server_arguments, 0, MPIR_MAX_ARG_LENGTH);
3070 
3071     
3072 
3073 
3074 
3075     ret = mca_base_var_env_name ("orte_in_parallel_debugger", &env_name);
3076     if (OPAL_SUCCESS == ret && NULL != env_name) {
3077         opal_setenv(env_name, "1", true, &environ);
3078         free(env_name);
3079     }
3080 
3081     
3082     if (!mpir_warning_printed) {
3083         mpir_warning_printed = true;
3084         
3085         if (NULL == getenv("OMPI_MPIR_DO_NOT_WARN")) {
3086             orte_show_help("help-orted.txt", "mpir-debugger-detected", true);
3087         }
3088     }
3089 
3090     
3091     execvp(new_argv[0], new_argv);
3092     value = opal_argv_join(new_argv, ' ');
3093     orte_show_help("help-orterun.txt", "debugger-exec-failed",
3094                    true, basename, value, new_argv[0]);
3095     free(value);
3096     opal_argv_free(new_argv);
3097     exit(1);
3098 }
3099 
3100 void orte_debugger_detached(int fd, short event, void *cbdata)
3101 {
3102     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
3103     OBJ_RELEASE(caddy);
3104 
3105     
3106     mpir_breakpoint_fired = false;
3107 }
3108 
3109 static uint32_t ntraces = 0;
3110 static orte_timer_t stack_trace_timer;
3111 
3112 static void stack_trace_recv(int status, orte_process_name_t* sender,
3113                              opal_buffer_t *buffer, orte_rml_tag_t tag,
3114                              void* cbdata)
3115 {
3116     opal_buffer_t *blob;
3117     char *st;
3118     int32_t cnt;
3119     orte_process_name_t name;
3120     char *hostname;
3121     pid_t pid;
3122 
3123     
3124     cnt = 1;
3125     while (OPAL_SUCCESS == opal_dss.unpack(buffer, &blob, &cnt, OPAL_BUFFER)) {
3126         
3127         cnt = 1;
3128         if (OPAL_SUCCESS != opal_dss.unpack(blob, &name, &cnt, ORTE_NAME) ||
3129             OPAL_SUCCESS != opal_dss.unpack(blob, &hostname, &cnt, OPAL_STRING) ||
3130             OPAL_SUCCESS != opal_dss.unpack(blob, &pid, &cnt, OPAL_PID)) {
3131             OBJ_RELEASE(blob);
3132             continue;
3133         }
3134         fprintf(stderr, "STACK TRACE FOR PROC %s (%s, PID %lu)\n", ORTE_NAME_PRINT(&name), hostname, (unsigned long) pid);
3135         free(hostname);
3136         
3137         cnt = 1;
3138         while (OPAL_SUCCESS == opal_dss.unpack(blob, &st, &cnt, OPAL_STRING)) {
3139             fprintf(stderr, "\t%s", st);  
3140             free(st);
3141             cnt = 1;
3142         }
3143         fprintf(stderr, "\n");
3144         OBJ_RELEASE(blob);
3145         cnt = 1;
3146     }
3147     ++ntraces;
3148     if (orte_process_info.num_procs == ntraces) {
3149         if( orte_stack_trace_wait_timeout > 0 ) {
3150             
3151             OBJ_DESTRUCT(&stack_trace_timer);
3152         }
3153         
3154         ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
3155         
3156         orte_abnormal_term_ordered = true;
3157     }
3158 }
3159 
3160 static void stack_trace_timeout(int sd, short args, void *cbdata)
3161 {
3162     
3163     ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
3164     
3165     orte_abnormal_term_ordered = true;
3166 }
3167 
3168 void orte_timeout_wakeup(int sd, short args, void *cbdata)
3169 {
3170     orte_job_t *jdata;
3171     orte_proc_t *proc;
3172     int i;
3173     int rc;
3174     uint32_t key;
3175     void *nptr;
3176 
3177     
3178 
3179 
3180     orte_show_help("help-orterun.txt", "orterun:timeout",
3181                    true, timeout_seconds);
3182     ORTE_UPDATE_EXIT_STATUS(ETIMEDOUT);
3183     
3184     if (ORTE_PROC_IS_HNP &&
3185         NULL != getenv("ORTE_TEST_HNP_SUICIDE")) {
3186         opal_output(0, "HNP exiting w/o cleanup");
3187         exit(1);
3188     }
3189     if (orte_cmd_options.report_state_on_timeout) {
3190         
3191         rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jdata, &nptr);
3192         while (OPAL_SUCCESS == rc) {
3193             
3194             fprintf(stderr, "DATA FOR JOB: %s\n", ORTE_JOBID_PRINT(jdata->jobid));
3195             fprintf(stderr, "\tNum apps: %d\tNum procs: %d\tJobState: %s\tAbort: %s\n",
3196                     (int)jdata->num_apps, (int)jdata->num_procs,
3197                     orte_job_state_to_str(jdata->state),
3198                     (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) ? "True" : "False");
3199             fprintf(stderr, "\tNum launched: %ld\tNum reported: %ld\tNum terminated: %ld\n",
3200                     (long)jdata->num_launched, (long)jdata->num_reported, (long)jdata->num_terminated);
3201             fprintf(stderr, "\n\tProcs:\n");
3202             for (i=0; i < jdata->procs->size; i++) {
3203                 if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
3204                     fprintf(stderr, "\t\tRank: %s\tNode: %s\tPID: %u\tState: %s\tExitCode %d\n",
3205                             ORTE_VPID_PRINT(proc->name.vpid),
3206                             (NULL == proc->node) ? "UNKNOWN" : proc->node->name,
3207                             (unsigned int)proc->pid,
3208                             orte_proc_state_to_str(proc->state), proc->exit_code);
3209                 }
3210             }
3211             fprintf(stderr, "\n");
3212             rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jdata, nptr, &nptr);
3213         }
3214     }
3215     
3216 
3217     if (orte_cmd_options.get_stack_traces) {
3218         orte_daemon_cmd_flag_t command = ORTE_DAEMON_GET_STACK_TRACES;
3219         opal_buffer_t *buffer;
3220         orte_grpcomm_signature_t *sig;
3221 
3222         fprintf(stderr, "Waiting for stack traces (this may take a few moments)...\n");
3223 
3224         
3225         orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_STACK_TRACE,
3226                                 ORTE_RML_PERSISTENT, stack_trace_recv, NULL);
3227 
3228         
3229         buffer = OBJ_NEW(opal_buffer_t);
3230         
3231         if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {
3232             ORTE_ERROR_LOG(rc);
3233             OBJ_RELEASE(buffer);
3234             goto giveup;
3235         }
3236         
3237         sig = OBJ_NEW(orte_grpcomm_signature_t);
3238         sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
3239         sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
3240         sig->signature[0].vpid = ORTE_VPID_WILDCARD;
3241         sig->sz = 1;
3242         if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, buffer))) {
3243             ORTE_ERROR_LOG(rc);
3244             OBJ_RELEASE(buffer);
3245             OBJ_RELEASE(sig);
3246             goto giveup;
3247         }
3248         OBJ_RELEASE(buffer);
3249         
3250         OBJ_RELEASE(sig);
3251         
3252 
3253         if( orte_stack_trace_wait_timeout > 0 ) {
3254             OBJ_CONSTRUCT(&stack_trace_timer, orte_timer_t);
3255             opal_event_evtimer_set(orte_event_base,
3256                                    stack_trace_timer.ev, stack_trace_timeout, NULL);
3257             opal_event_set_priority(stack_trace_timer.ev, ORTE_ERROR_PRI);
3258             stack_trace_timer.tv.tv_sec = orte_stack_trace_wait_timeout;
3259             opal_event_evtimer_add(stack_trace_timer.ev, &stack_trace_timer.tv);
3260         }
3261         return;
3262     }
3263   giveup:
3264     
3265     ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
3266     
3267     orte_abnormal_term_ordered = true;
3268 }
3269 
3270 static int nreports = 0;
3271 static orte_timer_t profile_timer;
3272 static int nchecks = 0;
3273 
3274 static void profile_timeout(int sd, short args, void *cbdata)
3275 {
3276     
3277     ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
3278     
3279     orte_abnormal_term_ordered = true;
3280 }
3281 
3282 
3283 static void profile_recv(int status, orte_process_name_t* sender,
3284                          opal_buffer_t *buffer, orte_rml_tag_t tag,
3285                          void* cbdata)
3286 {
3287     int32_t cnt;
3288     char *hostname;
3289     float dpss, pss;
3290 
3291     
3292     cnt = 1;
3293     if (OPAL_SUCCESS != opal_dss.unpack(buffer, &hostname, &cnt, OPAL_STRING)) {
3294         goto done;
3295     }
3296     
3297     fprintf(stderr, "Memory profile from host: %s\n", hostname);
3298     free(hostname);
3299 
3300     
3301     cnt = 1;
3302     if (OPAL_SUCCESS != opal_dss.unpack(buffer, &dpss, &cnt, OPAL_FLOAT)) {
3303         goto done;
3304     }
3305     
3306     cnt = 1;
3307     if (OPAL_SUCCESS != opal_dss.unpack(buffer, &pss, &cnt, OPAL_FLOAT)) {
3308         goto done;
3309     }
3310 
3311     fprintf(stderr, "\tDaemon: %8.2fM\tProcs: %8.2fM\n", dpss, pss);
3312 
3313   done:
3314     --nreports;
3315     if (nreports == 0) {
3316         ++nchecks;
3317         
3318         OBJ_DESTRUCT(&profile_timer);
3319         
3320         _send_notification(12345);
3321         
3322 
3323         if (2 > nchecks) {
3324             
3325             opal_event_evtimer_set(orte_event_base, orte_memprofile_timeout->ev,
3326                                    orte_profile_wakeup, NULL);
3327             opal_event_set_priority(orte_memprofile_timeout->ev, ORTE_ERROR_PRI);
3328             opal_event_evtimer_add(orte_memprofile_timeout->ev, &orte_memprofile_timeout->tv);
3329             
3330             OBJ_CONSTRUCT(&profile_timer, orte_timer_t);
3331             opal_event_evtimer_set(orte_event_base,
3332                                    profile_timer.ev, profile_timeout, NULL);
3333             opal_event_set_priority(profile_timer.ev, ORTE_ERROR_PRI);
3334             profile_timer.tv.tv_sec = 30;
3335             opal_event_evtimer_add(profile_timer.ev, &profile_timer.tv);
3336             return;
3337         }
3338     }
3339 }
3340 
3341 void orte_profile_wakeup(int sd, short args, void *cbdata)
3342 {
3343     orte_job_t *dmns;
3344     orte_proc_t *dmn;
3345     int i;
3346     int rc;
3347     orte_daemon_cmd_flag_t command = ORTE_DAEMON_GET_MEMPROFILE;
3348     opal_buffer_t *buffer;
3349     orte_process_name_t name;
3350 
3351     
3352 
3353 
3354 
3355 
3356     
3357     nreports = 1;  
3358 
3359     
3360     buffer = OBJ_NEW(opal_buffer_t);
3361     
3362     if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {
3363         ORTE_ERROR_LOG(rc);
3364         OBJ_RELEASE(buffer);
3365         goto giveup;
3366     }
3367     
3368     dmns = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
3369     if (NULL != (dmn = (orte_proc_t*)opal_pointer_array_get_item(dmns->procs, 1))) {
3370         ++nreports;
3371     }
3372 
3373     
3374     name.jobid = ORTE_PROC_MY_NAME->jobid;
3375     for (i=0; i < nreports; i++) {
3376         OBJ_RETAIN(buffer);
3377         name.vpid = i;
3378         if (0 > (rc = orte_rml.send_buffer_nb(&name, buffer,
3379                                               ORTE_RML_TAG_DAEMON,
3380                                               orte_rml_send_callback, NULL))) {
3381             ORTE_ERROR_LOG(rc);
3382             OBJ_RELEASE(buffer);
3383         }
3384     }
3385     OBJ_RELEASE(buffer); 
3386 
3387     
3388 
3389     OBJ_CONSTRUCT(&profile_timer, orte_timer_t);
3390     opal_event_evtimer_set(orte_event_base,
3391                            profile_timer.ev, profile_timeout, NULL);
3392     opal_event_set_priority(profile_timer.ev, ORTE_ERROR_PRI);
3393     profile_timer.tv.tv_sec = 30;
3394     opal_event_evtimer_add(profile_timer.ev, &profile_timer.tv);
3395     return;
3396 
3397   giveup:
3398     
3399     ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALL_JOBS_COMPLETE);
3400 }