root/orte/runtime/orte_globals.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_dt_init
  2. orte_get_job_data_object
  3. orte_get_proc_object
  4. orte_get_proc_daemon_vpid
  5. orte_get_proc_hostname
  6. orte_get_proc_node_rank
  7. orte_get_lowest_vpid_alive
  8. orte_app_context_construct
  9. orte_app_context_destructor
  10. orte_job_construct
  11. orte_job_destruct
  12. orte_node_construct
  13. orte_node_destruct
  14. orte_proc_construct
  15. orte_proc_destruct
  16. orte_job_map_construct
  17. orte_job_map_destruct
  18. orte_attr_cons
  19. orte_attr_des
  20. tcon
  21. tdes

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007-2017 Cisco Systems, Inc.  All rights reserved
  13  * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
  14  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
  15  *                         All rights reserved.
  16  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  17  * Copyright (c) 2014-2018 Research Organization for Information Science
  18  *                         and Technology (RIST).  All rights reserved.
  19  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  */
  26 
  27 #include "orte_config.h"
  28 #include "orte/constants.h"
  29 #include "orte/types.h"
  30 
  31 #ifdef HAVE_SYS_TIME_H
  32 #include <sys/time.h>
  33 #endif
  34 
  35 #include "opal/mca/hwloc/hwloc-internal.h"
  36 #include "opal/mca/pmix/pmix.h"
  37 #include "opal/util/argv.h"
  38 #include "opal/util/output.h"
  39 #include "opal/class/opal_hash_table.h"
  40 #include "opal/class/opal_pointer_array.h"
  41 #include "opal/class/opal_value_array.h"
  42 #include "opal/dss/dss.h"
  43 #include "opal/threads/threads.h"
  44 
  45 #include "orte/mca/errmgr/errmgr.h"
  46 #include "orte/mca/rml/rml.h"
  47 #include "orte/util/proc_info.h"
  48 #include "orte/util/name_fns.h"
  49 
  50 #include "orte/runtime/runtime.h"
  51 #include "orte/runtime/runtime_internals.h"
  52 #include "orte/runtime/orte_globals.h"
  53 
  54 /* need the data type support functions here */
  55 #include "orte/runtime/data_type_support/orte_dt_support.h"
  56 
  57 /* State Machine */
  58 opal_list_t orte_job_states = {{0}};
  59 opal_list_t orte_proc_states = {{0}};
  60 
  61 /* a clean output channel without prefix */
  62 int orte_clean_output = -1;
  63 
  64 /* globals used by RTE */
  65 bool orte_debug_daemons_file_flag = false;
  66 bool orte_leave_session_attached = false;
  67 bool orte_do_not_launch = false;
  68 bool orted_spin_flag = false;
  69 char *orte_local_cpu_type = NULL;
  70 char *orte_local_cpu_model = NULL;
  71 char *orte_basename = NULL;
  72 bool orte_coprocessors_detected = false;
  73 opal_hash_table_t *orte_coprocessors = NULL;
  74 char *orte_topo_signature = NULL;
  75 bool orte_no_vm = false;
  76 char *orte_data_server_uri = NULL;
  77 
  78 /* ORTE OOB port flags */
  79 bool orte_static_ports = false;
  80 bool orte_standalone_operation = false;
  81 
  82 bool orte_keep_fqdn_hostnames = false;
  83 bool orte_have_fqdn_allocation = false;
  84 bool orte_show_resolved_nodenames = false;
  85 bool orte_retain_aliases = false;
  86 int orte_use_hostname_alias = -1;
  87 int orte_hostname_cutoff = 1000;
  88 
  89 int orted_debug_failure = -1;
  90 int orted_debug_failure_delay = -1;
  91 bool orte_never_launched = false;
  92 bool orte_devel_level_output = false;
  93 bool orte_display_topo_with_map = false;
  94 bool orte_display_diffable_output = false;
  95 
  96 char **orte_launch_environ = NULL;
  97 
  98 bool orte_hnp_is_allocated = false;
  99 bool orte_allocation_required = false;
 100 bool orte_managed_allocation = false;
 101 char *orte_set_slots = NULL;
 102 bool orte_display_allocation = false;
 103 bool orte_display_devel_allocation = false;
 104 bool orte_soft_locations = false;
 105 int orted_pmi_version = 0;
 106 bool orte_nidmap_communicated = false;
 107 bool orte_node_info_communicated = false;
 108 
 109 /* launch agents */
 110 char *orte_launch_agent = NULL;
 111 char **orted_cmd_line=NULL;
 112 char **orte_fork_agent=NULL;
 113 
 114 /* debugger job */
 115 bool orte_debugger_dump_proctable = false;
 116 char *orte_debugger_test_daemon = NULL;
 117 bool orte_debugger_test_attach = false;
 118 int orte_debugger_check_rate = -1;
 119 
 120 /* exit flags */
 121 int orte_exit_status = 0;
 122 bool orte_abnormal_term_ordered = false;
 123 bool orte_routing_is_enabled = true;
 124 bool orte_job_term_ordered = false;
 125 bool orte_orteds_term_ordered = false;
 126 bool orte_allowed_exit_without_sync = false;
 127 
 128 int orte_startup_timeout = -1;
 129 int orte_timeout_usec_per_proc = -1;
 130 float orte_max_timeout = -1.0;
 131 orte_timer_t *orte_mpiexec_timeout = NULL;
 132 
 133 int orte_stack_trace_wait_timeout = 30;
 134 
 135 /* global arrays for data storage */
 136 opal_hash_table_t *orte_job_data = NULL;
 137 opal_pointer_array_t *orte_node_pool = NULL;
 138 opal_pointer_array_t *orte_node_topologies = NULL;
 139 opal_pointer_array_t *orte_local_children = NULL;
 140 orte_vpid_t orte_total_procs = 0;
 141 
 142 /* IOF controls */
 143 bool orte_tag_output = false;
 144 bool orte_timestamp_output = false;
 145 /* generate new xterm windows to display output from specified ranks */
 146 char *orte_xterm = NULL;
 147 
 148 /* report launch progress */
 149 bool orte_report_launch_progress = false;
 150 
 151 /* allocation specification */
 152 char *orte_default_hostfile = NULL;
 153 bool orte_default_hostfile_given = false;
 154 char *orte_rankfile = NULL;
 155 int orte_num_allocated_nodes = 0;
 156 char *orte_default_dash_host = NULL;
 157 
 158 /* tool communication controls */
 159 bool orte_report_events = false;
 160 char *orte_report_events_uri = NULL;
 161 
 162 /* report bindings */
 163 bool orte_report_bindings = false;
 164 
 165 /* barrier control */
 166 bool orte_do_not_barrier = false;
 167 
 168 /* process recovery */
 169 bool orte_enable_recovery = false;
 170 int32_t orte_max_restarts = 0;
 171 
 172 /* exit status reporting */
 173 bool orte_report_child_jobs_separately = false;
 174 struct timeval orte_child_time_to_exit = {0};
 175 bool orte_abort_non_zero_exit = false;
 176 
 177 /* length of stat history to keep */
 178 int orte_stat_history_size = -1;
 179 
 180 /* envars to forward */
 181 char **orte_forwarded_envars = NULL;
 182 
 183 /* map stddiag output to stderr so it isn't forwarded to mpirun */
 184 bool orte_map_stddiag_to_stderr = false;
 185 bool orte_map_stddiag_to_stdout = false;
 186 
 187 /* maximum size of virtual machine - used to subdivide allocation */
 188 int orte_max_vm_size = -1;
 189 
 190 /* user debugger */
 191 char *orte_base_user_debugger = NULL;
 192 
 193 int orte_debug_output = -1;
 194 bool orte_debug_daemons_flag = false;
 195 bool orte_xml_output = false;
 196 FILE *orte_xml_fp = NULL;
 197 char *orte_job_ident = NULL;
 198 bool orte_execute_quiet = false;
 199 bool orte_report_silent_errors = false;
 200 
 201 /* See comment in orte/tools/orterun/debuggers.c about this MCA
 202    param */
 203 bool orte_in_parallel_debugger = false;
 204 
 205 char *orte_daemon_cores = NULL;
 206 
 207 int orte_dt_init(void)
 208 {
 209     int rc;
 210     opal_data_type_t tmp;
 211 
 212     /* set default output */
 213     orte_debug_output = opal_output_open(NULL);
 214 
 215     /* open up the verbose output for ORTE debugging */
 216     if (orte_debug_flag || 0 < orte_debug_verbosity ||
 217         (orte_debug_daemons_flag && (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP))) {
 218         if (0 < orte_debug_verbosity) {
 219             opal_output_set_verbosity(orte_debug_output, orte_debug_verbosity);
 220         } else {
 221             opal_output_set_verbosity(orte_debug_output, 1);
 222         }
 223     }
 224 
 225     /** register the base system types with the DSS */
 226     tmp = ORTE_STD_CNTR;
 227     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_std_cntr,
 228                                                      orte_dt_unpack_std_cntr,
 229                                                      (opal_dss_copy_fn_t)orte_dt_copy_std_cntr,
 230                                                      (opal_dss_compare_fn_t)orte_dt_compare_std_cntr,
 231                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 232                                                      OPAL_DSS_UNSTRUCTURED,
 233                                                      "ORTE_STD_CNTR", &tmp))) {
 234         ORTE_ERROR_LOG(rc);
 235         return rc;
 236     }
 237 
 238     tmp = ORTE_JOB;
 239     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_job,
 240                                                      orte_dt_unpack_job,
 241                                                      (opal_dss_copy_fn_t)orte_dt_copy_job,
 242                                                      (opal_dss_compare_fn_t)orte_dt_compare_job,
 243                                                      (opal_dss_print_fn_t)orte_dt_print_job,
 244                                                      OPAL_DSS_STRUCTURED,
 245                                                      "ORTE_JOB", &tmp))) {
 246         ORTE_ERROR_LOG(rc);
 247         return rc;
 248     }
 249 
 250     tmp = ORTE_NODE;
 251     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_node,
 252                                                      orte_dt_unpack_node,
 253                                                      (opal_dss_copy_fn_t)orte_dt_copy_node,
 254                                                      (opal_dss_compare_fn_t)orte_dt_compare_node,
 255                                                      (opal_dss_print_fn_t)orte_dt_print_node,
 256                                                      OPAL_DSS_STRUCTURED,
 257                                                      "ORTE_NODE", &tmp))) {
 258         ORTE_ERROR_LOG(rc);
 259         return rc;
 260     }
 261 
 262     tmp = ORTE_PROC;
 263     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_proc,
 264                                                      orte_dt_unpack_proc,
 265                                                      (opal_dss_copy_fn_t)orte_dt_copy_proc,
 266                                                      (opal_dss_compare_fn_t)orte_dt_compare_proc,
 267                                                      (opal_dss_print_fn_t)orte_dt_print_proc,
 268                                                      OPAL_DSS_STRUCTURED,
 269                                                      "ORTE_PROC", &tmp))) {
 270         ORTE_ERROR_LOG(rc);
 271         return rc;
 272     }
 273 
 274     tmp = ORTE_APP_CONTEXT;
 275     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_app_context,
 276                                                      orte_dt_unpack_app_context,
 277                                                      (opal_dss_copy_fn_t)orte_dt_copy_app_context,
 278                                                      (opal_dss_compare_fn_t)orte_dt_compare_app_context,
 279                                                      (opal_dss_print_fn_t)orte_dt_print_app_context,
 280                                                      OPAL_DSS_STRUCTURED,
 281                                                      "ORTE_APP_CONTEXT", &tmp))) {
 282         ORTE_ERROR_LOG(rc);
 283         return rc;
 284     }
 285 
 286     tmp = ORTE_NODE_STATE;
 287     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_node_state,
 288                                                      orte_dt_unpack_node_state,
 289                                                      (opal_dss_copy_fn_t)orte_dt_copy_node_state,
 290                                                      (opal_dss_compare_fn_t)orte_dt_compare_node_state,
 291                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 292                                                      OPAL_DSS_UNSTRUCTURED,
 293                                                      "ORTE_NODE_STATE", &tmp))) {
 294         ORTE_ERROR_LOG(rc);
 295         return rc;
 296     }
 297 
 298     tmp = ORTE_PROC_STATE;
 299     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_proc_state,
 300                                                      orte_dt_unpack_proc_state,
 301                                                      (opal_dss_copy_fn_t)orte_dt_copy_proc_state,
 302                                                      (opal_dss_compare_fn_t)orte_dt_compare_proc_state,
 303                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 304                                                      OPAL_DSS_UNSTRUCTURED,
 305                                                      "ORTE_PROC_STATE", &tmp))) {
 306         ORTE_ERROR_LOG(rc);
 307         return rc;
 308     }
 309 
 310     tmp = ORTE_JOB_STATE;
 311     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_job_state,
 312                                                      orte_dt_unpack_job_state,
 313                                                      (opal_dss_copy_fn_t)orte_dt_copy_job_state,
 314                                                      (opal_dss_compare_fn_t)orte_dt_compare_job_state,
 315                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 316                                                      OPAL_DSS_UNSTRUCTURED,
 317                                                      "ORTE_JOB_STATE", &tmp))) {
 318         ORTE_ERROR_LOG(rc);
 319         return rc;
 320     }
 321 
 322     tmp = ORTE_EXIT_CODE;
 323     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_exit_code,
 324                                                      orte_dt_unpack_exit_code,
 325                                                      (opal_dss_copy_fn_t)orte_dt_copy_exit_code,
 326                                                      (opal_dss_compare_fn_t)orte_dt_compare_exit_code,
 327                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 328                                                      OPAL_DSS_UNSTRUCTURED,
 329                                                      "ORTE_EXIT_CODE", &tmp))) {
 330         ORTE_ERROR_LOG(rc);
 331         return rc;
 332     }
 333 
 334     tmp = ORTE_JOB_MAP;
 335     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_map,
 336                                                      orte_dt_unpack_map,
 337                                                      (opal_dss_copy_fn_t)orte_dt_copy_map,
 338                                                      (opal_dss_compare_fn_t)orte_dt_compare_map,
 339                                                      (opal_dss_print_fn_t)orte_dt_print_map,
 340                                                      OPAL_DSS_STRUCTURED,
 341                                                      "ORTE_JOB_MAP", &tmp))) {
 342         ORTE_ERROR_LOG(rc);
 343         return rc;
 344     }
 345 
 346     tmp = ORTE_RML_TAG;
 347     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_tag,
 348                                                       orte_dt_unpack_tag,
 349                                                       (opal_dss_copy_fn_t)orte_dt_copy_tag,
 350                                                       (opal_dss_compare_fn_t)orte_dt_compare_tags,
 351                                                       (opal_dss_print_fn_t)orte_dt_std_print,
 352                                                       OPAL_DSS_UNSTRUCTURED,
 353                                                       "ORTE_RML_TAG", &tmp))) {
 354         ORTE_ERROR_LOG(rc);
 355         return rc;
 356     }
 357 
 358     tmp = ORTE_DAEMON_CMD;
 359     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_daemon_cmd,
 360                                                      orte_dt_unpack_daemon_cmd,
 361                                                      (opal_dss_copy_fn_t)orte_dt_copy_daemon_cmd,
 362                                                      (opal_dss_compare_fn_t)orte_dt_compare_daemon_cmd,
 363                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 364                                                      OPAL_DSS_UNSTRUCTURED,
 365                                                      "ORTE_DAEMON_CMD", &tmp))) {
 366         ORTE_ERROR_LOG(rc);
 367         return rc;
 368     }
 369 
 370     tmp = ORTE_IOF_TAG;
 371     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_iof_tag,
 372                                                      orte_dt_unpack_iof_tag,
 373                                                      (opal_dss_copy_fn_t)orte_dt_copy_iof_tag,
 374                                                      (opal_dss_compare_fn_t)orte_dt_compare_iof_tag,
 375                                                      (opal_dss_print_fn_t)orte_dt_std_print,
 376                                                      OPAL_DSS_UNSTRUCTURED,
 377                                                      "ORTE_IOF_TAG", &tmp))) {
 378         ORTE_ERROR_LOG(rc);
 379         return rc;
 380     }
 381 
 382     tmp = ORTE_ATTRIBUTE;
 383     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_attr,
 384                                                      orte_dt_unpack_attr,
 385                                                      (opal_dss_copy_fn_t)orte_dt_copy_attr,
 386                                                      (opal_dss_compare_fn_t)orte_dt_compare_attr,
 387                                                      (opal_dss_print_fn_t)orte_dt_print_attr,
 388                                                      OPAL_DSS_STRUCTURED,
 389                                                      "ORTE_ATTRIBUTE", &tmp))) {
 390         ORTE_ERROR_LOG(rc);
 391         return rc;
 392     }
 393 
 394     tmp = ORTE_SIGNATURE;
 395     if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_sig,
 396                                                      orte_dt_unpack_sig,
 397                                                      (opal_dss_copy_fn_t)orte_dt_copy_sig,
 398                                                      (opal_dss_compare_fn_t)orte_dt_compare_sig,
 399                                                      (opal_dss_print_fn_t)orte_dt_print_sig,
 400                                                      OPAL_DSS_STRUCTURED,
 401                                                      "ORTE_SIGNATURE", &tmp))) {
 402         ORTE_ERROR_LOG(rc);
 403         return rc;
 404     }
 405 
 406     return ORTE_SUCCESS;
 407 }
 408 
 409 orte_job_t* orte_get_job_data_object(orte_jobid_t job)
 410 {
 411     orte_job_t *jdata;
 412 
 413     /* if the job data wasn't setup, we cannot provide the data */
 414     if (NULL == orte_job_data) {
 415         return NULL;
 416     }
 417 
 418     jdata = NULL;
 419     opal_hash_table_get_value_uint32(orte_job_data, job, (void**)&jdata);
 420     return jdata;
 421 }
 422 
 423 orte_proc_t* orte_get_proc_object(orte_process_name_t *proc)
 424 {
 425     orte_job_t *jdata;
 426     orte_proc_t *proct;
 427 
 428     if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
 429         return NULL;
 430     }
 431     proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
 432     return proct;
 433 }
 434 
 435 orte_vpid_t orte_get_proc_daemon_vpid(orte_process_name_t *proc)
 436 {
 437     orte_job_t *jdata;
 438     orte_proc_t *proct;
 439 
 440     if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
 441         return ORTE_VPID_INVALID;
 442     }
 443     if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid))) {
 444         return ORTE_VPID_INVALID;
 445     }
 446     if (NULL == proct->node || NULL == proct->node->daemon) {
 447         return ORTE_VPID_INVALID;
 448     }
 449     return proct->node->daemon->name.vpid;
 450 }
 451 
 452 char* orte_get_proc_hostname(orte_process_name_t *proc)
 453 {
 454     orte_proc_t *proct;
 455     char *hostname = NULL;
 456     int rc;
 457 
 458     /* if we are a tool, then we have no way of obtaining
 459      * this info */
 460     if (ORTE_PROC_IS_TOOL) {
 461         return NULL;
 462     }
 463 
 464     /* don't bother error logging any not-found situations
 465      * as the layer above us will have something to say
 466      * about it */
 467     if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
 468         /* look it up on our arrays */
 469         if (NULL == (proct = orte_get_proc_object(proc))) {
 470             return NULL;
 471         }
 472         if (NULL == proct->node || NULL == proct->node->name) {
 473             return NULL;
 474         }
 475         return proct->node->name;
 476     }
 477 
 478     /* if we are an app, get the data from the modex db */
 479     OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_HOSTNAME,
 480                           (opal_process_name_t*)proc,
 481                           &hostname, OPAL_STRING);
 482 
 483     /* user is responsible for releasing the data */
 484     return hostname;
 485 }
 486 
 487 orte_node_rank_t orte_get_proc_node_rank(orte_process_name_t *proc)
 488 {
 489     orte_proc_t *proct;
 490     orte_node_rank_t *noderank, nd;
 491     int rc;
 492 
 493     if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
 494         /* look it up on our arrays */
 495         if (NULL == (proct = orte_get_proc_object(proc))) {
 496             ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 497             return ORTE_NODE_RANK_INVALID;
 498         }
 499         return proct->node_rank;
 500     }
 501 
 502     /* if we are an app, get the value from the modex db */
 503     noderank = &nd;
 504     OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_NODE_RANK,
 505                           (opal_process_name_t*)proc,
 506                           &noderank, ORTE_NODE_RANK);
 507     if (OPAL_SUCCESS != rc) {
 508         nd = ORTE_NODE_RANK_INVALID;
 509     }
 510     return nd;
 511 }
 512 
 513 orte_vpid_t orte_get_lowest_vpid_alive(orte_jobid_t job)
 514 {
 515     int i;
 516     orte_job_t *jdata;
 517     orte_proc_t *proc;
 518 
 519     if (NULL == (jdata = orte_get_job_data_object(job))) {
 520         return ORTE_VPID_INVALID;
 521     }
 522 
 523     if (ORTE_PROC_IS_DAEMON &&
 524         ORTE_PROC_MY_NAME->jobid == job &&
 525         NULL != orte_process_info.my_hnp_uri) {
 526         /* if we were started by an HNP, then the lowest vpid
 527          * is always 1
 528          */
 529         return 1;
 530     }
 531 
 532     for (i=0; i < jdata->procs->size; i++) {
 533         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
 534             continue;
 535         }
 536         if (proc->state == ORTE_PROC_STATE_RUNNING) {
 537             /* must be lowest one alive */
 538             return proc->name.vpid;
 539         }
 540     }
 541     /* only get here if no live proc found */
 542     return ORTE_VPID_INVALID;
 543 }
 544 
 545 
 546 /*
 547  * CONSTRUCTORS, DESTRUCTORS, AND CLASS INSTANTIATIONS
 548  * FOR ORTE CLASSES
 549  */
 550 
 551 static void orte_app_context_construct(orte_app_context_t* app_context)
 552 {
 553     app_context->idx=0;
 554     app_context->app=NULL;
 555     app_context->num_procs=0;
 556     OBJ_CONSTRUCT(&app_context->procs, opal_pointer_array_t);
 557     opal_pointer_array_init(&app_context->procs,
 558                             1,
 559                             ORTE_GLOBAL_ARRAY_MAX_SIZE,
 560                             16);
 561     app_context->state = ORTE_APP_STATE_UNDEF;
 562     app_context->first_rank = 0;
 563     app_context->argv=NULL;
 564     app_context->env=NULL;
 565     app_context->cwd=NULL;
 566     app_context->flags = 0;
 567     OBJ_CONSTRUCT(&app_context->attributes, opal_list_t);
 568 }
 569 
 570 static void orte_app_context_destructor(orte_app_context_t* app_context)
 571 {
 572     int i;
 573     orte_proc_t *proc;
 574 
 575     if (NULL != app_context->app) {
 576         free (app_context->app);
 577         app_context->app = NULL;
 578     }
 579 
 580     for (i=0; i < app_context->procs.size; i++) {
 581         if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(&app_context->procs, i))) {
 582             OBJ_RELEASE(proc);
 583         }
 584     }
 585     OBJ_DESTRUCT(&app_context->procs);
 586 
 587     /* argv and env lists created by util/argv copy functions */
 588     if (NULL != app_context->argv) {
 589         opal_argv_free(app_context->argv);
 590         app_context->argv = NULL;
 591     }
 592 
 593     if (NULL != app_context->env) {
 594         opal_argv_free(app_context->env);
 595         app_context->env = NULL;
 596     }
 597 
 598     if (NULL != app_context->cwd) {
 599         free (app_context->cwd);
 600         app_context->cwd = NULL;
 601     }
 602 
 603     OPAL_LIST_DESTRUCT(&app_context->attributes);
 604 }
 605 
 606 OBJ_CLASS_INSTANCE(orte_app_context_t,
 607                    opal_object_t,
 608                    orte_app_context_construct,
 609                    orte_app_context_destructor);
 610 
 611 static void orte_job_construct(orte_job_t* job)
 612 {
 613     job->personality = NULL;
 614     job->jobid = ORTE_JOBID_INVALID;
 615     job->offset = 0;
 616     job->apps = OBJ_NEW(opal_pointer_array_t);
 617     opal_pointer_array_init(job->apps,
 618                             1,
 619                             ORTE_GLOBAL_ARRAY_MAX_SIZE,
 620                             2);
 621     job->num_apps = 0;
 622     job->stdin_target = 0;
 623     job->total_slots_alloc = 0;
 624     job->num_procs = 0;
 625     job->procs = OBJ_NEW(opal_pointer_array_t);
 626     opal_pointer_array_init(job->procs,
 627                             ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 628                             ORTE_GLOBAL_ARRAY_MAX_SIZE,
 629                             ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
 630     job->map = NULL;
 631     job->bookmark = NULL;
 632     job->bkmark_obj = 0;
 633     job->state = ORTE_JOB_STATE_UNDEF;
 634 
 635     job->num_mapped = 0;
 636     job->num_launched = 0;
 637     job->num_reported = 0;
 638     job->num_terminated = 0;
 639     job->num_daemons_reported = 0;
 640 
 641     job->originator.jobid = ORTE_JOBID_INVALID;
 642     job->originator.vpid = ORTE_VPID_INVALID;
 643     job->num_local_procs = 0;
 644 
 645     job->flags = 0;
 646     ORTE_FLAG_SET(job, ORTE_JOB_FLAG_FORWARD_OUTPUT);
 647 
 648     OBJ_CONSTRUCT(&job->attributes, opal_list_t);
 649     OBJ_CONSTRUCT(&job->launch_msg, opal_buffer_t);
 650 }
 651 
 652 static void orte_job_destruct(orte_job_t* job)
 653 {
 654     orte_proc_t *proc;
 655     orte_app_context_t *app;
 656     int n;
 657     orte_timer_t *evtimer;
 658 
 659     if (NULL == job) {
 660         /* probably just a race condition - just return */
 661         return;
 662     }
 663 
 664     if (orte_debug_flag) {
 665         opal_output(0, "%s Releasing job data for %s",
 666                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job->jobid));
 667     }
 668 
 669     if (NULL != job->personality) {
 670         opal_argv_free(job->personality);
 671     }
 672     for (n=0; n < job->apps->size; n++) {
 673         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, n))) {
 674             continue;
 675         }
 676         OBJ_RELEASE(app);
 677     }
 678     OBJ_RELEASE(job->apps);
 679 
 680     /* release any pointers in the attributes */
 681     evtimer = NULL;
 682     if (orte_get_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT,
 683                            (void**)&evtimer, OPAL_PTR)) {
 684         orte_remove_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT);
 685         /* the timer is a pointer to orte_timer_t */
 686         OBJ_RELEASE(evtimer);
 687     }
 688     proc = NULL;
 689     if (orte_get_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC,
 690                            (void**)&proc, OPAL_PTR)) {
 691         orte_remove_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC);
 692         /* points to an orte_proc_t */
 693         OBJ_RELEASE(proc);
 694     }
 695 
 696     if (NULL != job->map) {
 697         OBJ_RELEASE(job->map);
 698         job->map = NULL;
 699     }
 700 
 701     for (n=0; n < job->procs->size; n++) {
 702         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(job->procs, n))) {
 703             continue;
 704         }
 705         OBJ_RELEASE(proc);
 706     }
 707     OBJ_RELEASE(job->procs);
 708 
 709     /* release the attributes */
 710     OPAL_LIST_DESTRUCT(&job->attributes);
 711 
 712     OBJ_DESTRUCT(&job->launch_msg);
 713 
 714     if (NULL != orte_job_data && ORTE_JOBID_INVALID != job->jobid) {
 715         /* remove the job from the global array */
 716         opal_hash_table_remove_value_uint32(orte_job_data, job->jobid);
 717     }
 718 }
 719 
 720 OBJ_CLASS_INSTANCE(orte_job_t,
 721                    opal_list_item_t,
 722                    orte_job_construct,
 723                    orte_job_destruct);
 724 
 725 
 726 static void orte_node_construct(orte_node_t* node)
 727 {
 728     node->index = -1;
 729     node->name = NULL;
 730     node->daemon = NULL;
 731 
 732     node->num_procs = 0;
 733     node->procs = OBJ_NEW(opal_pointer_array_t);
 734     opal_pointer_array_init(node->procs,
 735                             ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 736                             ORTE_GLOBAL_ARRAY_MAX_SIZE,
 737                             ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
 738     node->next_node_rank = 0;
 739 
 740     node->state = ORTE_NODE_STATE_UNKNOWN;
 741     node->slots = 0;
 742     node->slots_inuse = 0;
 743     node->slots_max = 0;
 744     node->topology = NULL;
 745 
 746     node->flags = 0;
 747     OBJ_CONSTRUCT(&node->attributes, opal_list_t);
 748 }
 749 
 750 static void orte_node_destruct(orte_node_t* node)
 751 {
 752     int i;
 753     orte_proc_t *proc;
 754 
 755     if (NULL != node->name) {
 756         free(node->name);
 757         node->name = NULL;
 758     }
 759 
 760     if (NULL != node->daemon) {
 761         node->daemon->node = NULL;
 762         OBJ_RELEASE(node->daemon);
 763         node->daemon = NULL;
 764     }
 765 
 766     for (i=0; i < node->procs->size; i++) {
 767         if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
 768             opal_pointer_array_set_item(node->procs, i, NULL);
 769             OBJ_RELEASE(proc);
 770         }
 771     }
 772     OBJ_RELEASE(node->procs);
 773 
 774     /* do NOT destroy the topology */
 775 
 776     /* release the attributes */
 777     OPAL_LIST_DESTRUCT(&node->attributes);
 778 }
 779 
 780 
 781 OBJ_CLASS_INSTANCE(orte_node_t,
 782                    opal_list_item_t,
 783                    orte_node_construct,
 784                    orte_node_destruct);
 785 
 786 
 787 
 788 static void orte_proc_construct(orte_proc_t* proc)
 789 {
 790     proc->name = *ORTE_NAME_INVALID;
 791     proc->pid = 0;
 792     proc->local_rank = ORTE_LOCAL_RANK_INVALID;
 793     proc->node_rank = ORTE_NODE_RANK_INVALID;
 794     proc->app_rank = -1;
 795     proc->last_errmgr_state = ORTE_PROC_STATE_UNDEF;
 796     proc->state = ORTE_PROC_STATE_UNDEF;
 797     proc->app_idx = 0;
 798     proc->node = NULL;
 799     proc->exit_code = 0;      /* Assume we won't fail unless otherwise notified */
 800     proc->rml_uri = NULL;
 801     proc->flags = 0;
 802     OBJ_CONSTRUCT(&proc->attributes, opal_list_t);
 803 }
 804 
 805 static void orte_proc_destruct(orte_proc_t* proc)
 806 {
 807     if (NULL != proc->node) {
 808         OBJ_RELEASE(proc->node);
 809         proc->node = NULL;
 810     }
 811 
 812     if (NULL != proc->rml_uri) {
 813         free(proc->rml_uri);
 814         proc->rml_uri = NULL;
 815     }
 816 
 817     OPAL_LIST_DESTRUCT(&proc->attributes);
 818 }
 819 
 820 OBJ_CLASS_INSTANCE(orte_proc_t,
 821                    opal_list_item_t,
 822                    orte_proc_construct,
 823                    orte_proc_destruct);
 824 
 825 static void orte_job_map_construct(orte_job_map_t* map)
 826 {
 827     map->req_mapper = NULL;
 828     map->last_mapper = NULL;
 829     map->mapping = 0;
 830     map->ranking = 0;
 831     map->binding = 0;
 832     map->ppr = NULL;
 833     map->cpus_per_rank = 0;
 834     map->display_map = false;
 835     map->num_new_daemons = 0;
 836     map->daemon_vpid_start = ORTE_VPID_INVALID;
 837     map->num_nodes = 0;
 838     map->nodes = OBJ_NEW(opal_pointer_array_t);
 839     opal_pointer_array_init(map->nodes,
 840                             ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
 841                             ORTE_GLOBAL_ARRAY_MAX_SIZE,
 842                             ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
 843 }
 844 
 845 static void orte_job_map_destruct(orte_job_map_t* map)
 846 {
 847     orte_std_cntr_t i;
 848     orte_node_t *node;
 849 
 850     if (NULL != map->req_mapper) {
 851         free(map->req_mapper);
 852     }
 853     if (NULL != map->last_mapper) {
 854         free(map->last_mapper);
 855     }
 856     if (NULL != map->ppr) {
 857         free(map->ppr);
 858     }
 859     for (i=0; i < map->nodes->size; i++) {
 860         if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
 861             OBJ_RELEASE(node);
 862             opal_pointer_array_set_item(map->nodes, i, NULL);
 863         }
 864     }
 865     OBJ_RELEASE(map->nodes);
 866 }
 867 
 868 OBJ_CLASS_INSTANCE(orte_job_map_t,
 869                    opal_object_t,
 870                    orte_job_map_construct,
 871                    orte_job_map_destruct);
 872 
 873 static void orte_attr_cons(orte_attribute_t* p)
 874 {
 875     p->key = 0;
 876     p->local = true;  // default to local-only data
 877     memset(&p->data, 0, sizeof(p->data));
 878 }
 879 static void orte_attr_des(orte_attribute_t *p)
 880 {
 881     if (OPAL_BYTE_OBJECT == p->type) {
 882         if (NULL != p->data.bo.bytes) {
 883             free(p->data.bo.bytes);
 884         }
 885     } else if (OPAL_BUFFER == p->type) {
 886         OBJ_DESTRUCT(&p->data.buf);
 887     } else if (OPAL_STRING == p->type) {
 888         free(p->data.string);
 889     }
 890 }
 891 OBJ_CLASS_INSTANCE(orte_attribute_t,
 892                    opal_list_item_t,
 893                    orte_attr_cons, orte_attr_des);
 894 
 895 static void tcon(orte_topology_t *t)
 896 {
 897     t->topo = NULL;
 898     t->sig = NULL;
 899 }
 900 static void tdes(orte_topology_t *t)
 901 {
 902     if (NULL != t->topo) {
 903         opal_hwloc_base_free_topology(t->topo);
 904     }
 905     if (NULL != t->sig) {
 906         free(t->sig);
 907     }
 908 }
 909 OBJ_CLASS_INSTANCE(orte_topology_t,
 910                    opal_object_t,
 911                    tcon, tdes);

/* [<][>][^][v][top][bottom][index][help] */