This source file includes following definitions.
- orte_dt_init
- orte_get_job_data_object
- orte_get_proc_object
- orte_get_proc_daemon_vpid
- orte_get_proc_hostname
- orte_get_proc_node_rank
- orte_get_lowest_vpid_alive
- orte_app_context_construct
- orte_app_context_destructor
- orte_job_construct
- orte_job_destruct
- orte_node_construct
- orte_node_destruct
- orte_proc_construct
- orte_proc_destruct
- orte_job_map_construct
- orte_job_map_destruct
- orte_attr_cons
- orte_attr_des
- tcon
- tdes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "orte_config.h"
28 #include "orte/constants.h"
29 #include "orte/types.h"
30
31 #ifdef HAVE_SYS_TIME_H
32 #include <sys/time.h>
33 #endif
34
35 #include "opal/mca/hwloc/hwloc-internal.h"
36 #include "opal/mca/pmix/pmix.h"
37 #include "opal/util/argv.h"
38 #include "opal/util/output.h"
39 #include "opal/class/opal_hash_table.h"
40 #include "opal/class/opal_pointer_array.h"
41 #include "opal/class/opal_value_array.h"
42 #include "opal/dss/dss.h"
43 #include "opal/threads/threads.h"
44
45 #include "orte/mca/errmgr/errmgr.h"
46 #include "orte/mca/rml/rml.h"
47 #include "orte/util/proc_info.h"
48 #include "orte/util/name_fns.h"
49
50 #include "orte/runtime/runtime.h"
51 #include "orte/runtime/runtime_internals.h"
52 #include "orte/runtime/orte_globals.h"
53
54
55 #include "orte/runtime/data_type_support/orte_dt_support.h"
56
57
58 opal_list_t orte_job_states = {{0}};
59 opal_list_t orte_proc_states = {{0}};
60
61
62 int orte_clean_output = -1;
63
64
65 bool orte_debug_daemons_file_flag = false;
66 bool orte_leave_session_attached = false;
67 bool orte_do_not_launch = false;
68 bool orted_spin_flag = false;
69 char *orte_local_cpu_type = NULL;
70 char *orte_local_cpu_model = NULL;
71 char *orte_basename = NULL;
72 bool orte_coprocessors_detected = false;
73 opal_hash_table_t *orte_coprocessors = NULL;
74 char *orte_topo_signature = NULL;
75 bool orte_no_vm = false;
76 char *orte_data_server_uri = NULL;
77
78
79 bool orte_static_ports = false;
80 bool orte_standalone_operation = false;
81
82 bool orte_keep_fqdn_hostnames = false;
83 bool orte_have_fqdn_allocation = false;
84 bool orte_show_resolved_nodenames = false;
85 bool orte_retain_aliases = false;
86 int orte_use_hostname_alias = -1;
87 int orte_hostname_cutoff = 1000;
88
89 int orted_debug_failure = -1;
90 int orted_debug_failure_delay = -1;
91 bool orte_never_launched = false;
92 bool orte_devel_level_output = false;
93 bool orte_display_topo_with_map = false;
94 bool orte_display_diffable_output = false;
95
96 char **orte_launch_environ = NULL;
97
98 bool orte_hnp_is_allocated = false;
99 bool orte_allocation_required = false;
100 bool orte_managed_allocation = false;
101 char *orte_set_slots = NULL;
102 bool orte_display_allocation = false;
103 bool orte_display_devel_allocation = false;
104 bool orte_soft_locations = false;
105 int orted_pmi_version = 0;
106 bool orte_nidmap_communicated = false;
107 bool orte_node_info_communicated = false;
108
109
110 char *orte_launch_agent = NULL;
111 char **orted_cmd_line=NULL;
112 char **orte_fork_agent=NULL;
113
114
115 bool orte_debugger_dump_proctable = false;
116 char *orte_debugger_test_daemon = NULL;
117 bool orte_debugger_test_attach = false;
118 int orte_debugger_check_rate = -1;
119
120
121 int orte_exit_status = 0;
122 bool orte_abnormal_term_ordered = false;
123 bool orte_routing_is_enabled = true;
124 bool orte_job_term_ordered = false;
125 bool orte_orteds_term_ordered = false;
126 bool orte_allowed_exit_without_sync = false;
127
128 int orte_startup_timeout = -1;
129 int orte_timeout_usec_per_proc = -1;
130 float orte_max_timeout = -1.0;
131 orte_timer_t *orte_mpiexec_timeout = NULL;
132
133 int orte_stack_trace_wait_timeout = 30;
134
135
136 opal_hash_table_t *orte_job_data = NULL;
137 opal_pointer_array_t *orte_node_pool = NULL;
138 opal_pointer_array_t *orte_node_topologies = NULL;
139 opal_pointer_array_t *orte_local_children = NULL;
140 orte_vpid_t orte_total_procs = 0;
141
142
143 bool orte_tag_output = false;
144 bool orte_timestamp_output = false;
145
146 char *orte_xterm = NULL;
147
148
149 bool orte_report_launch_progress = false;
150
151
152 char *orte_default_hostfile = NULL;
153 bool orte_default_hostfile_given = false;
154 char *orte_rankfile = NULL;
155 int orte_num_allocated_nodes = 0;
156 char *orte_default_dash_host = NULL;
157
158
159 bool orte_report_events = false;
160 char *orte_report_events_uri = NULL;
161
162
163 bool orte_report_bindings = false;
164
165
166 bool orte_do_not_barrier = false;
167
168
169 bool orte_enable_recovery = false;
170 int32_t orte_max_restarts = 0;
171
172
173 bool orte_report_child_jobs_separately = false;
174 struct timeval orte_child_time_to_exit = {0};
175 bool orte_abort_non_zero_exit = false;
176
177
178 int orte_stat_history_size = -1;
179
180
181 char **orte_forwarded_envars = NULL;
182
183
184 bool orte_map_stddiag_to_stderr = false;
185 bool orte_map_stddiag_to_stdout = false;
186
187
188 int orte_max_vm_size = -1;
189
190
191 char *orte_base_user_debugger = NULL;
192
193 int orte_debug_output = -1;
194 bool orte_debug_daemons_flag = false;
195 bool orte_xml_output = false;
196 FILE *orte_xml_fp = NULL;
197 char *orte_job_ident = NULL;
198 bool orte_execute_quiet = false;
199 bool orte_report_silent_errors = false;
200
201
202
203 bool orte_in_parallel_debugger = false;
204
205 char *orte_daemon_cores = NULL;
206
207 int orte_dt_init(void)
208 {
209 int rc;
210 opal_data_type_t tmp;
211
212
213 orte_debug_output = opal_output_open(NULL);
214
215
216 if (orte_debug_flag || 0 < orte_debug_verbosity ||
217 (orte_debug_daemons_flag && (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP))) {
218 if (0 < orte_debug_verbosity) {
219 opal_output_set_verbosity(orte_debug_output, orte_debug_verbosity);
220 } else {
221 opal_output_set_verbosity(orte_debug_output, 1);
222 }
223 }
224
225
226 tmp = ORTE_STD_CNTR;
227 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_std_cntr,
228 orte_dt_unpack_std_cntr,
229 (opal_dss_copy_fn_t)orte_dt_copy_std_cntr,
230 (opal_dss_compare_fn_t)orte_dt_compare_std_cntr,
231 (opal_dss_print_fn_t)orte_dt_std_print,
232 OPAL_DSS_UNSTRUCTURED,
233 "ORTE_STD_CNTR", &tmp))) {
234 ORTE_ERROR_LOG(rc);
235 return rc;
236 }
237
238 tmp = ORTE_JOB;
239 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_job,
240 orte_dt_unpack_job,
241 (opal_dss_copy_fn_t)orte_dt_copy_job,
242 (opal_dss_compare_fn_t)orte_dt_compare_job,
243 (opal_dss_print_fn_t)orte_dt_print_job,
244 OPAL_DSS_STRUCTURED,
245 "ORTE_JOB", &tmp))) {
246 ORTE_ERROR_LOG(rc);
247 return rc;
248 }
249
250 tmp = ORTE_NODE;
251 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_node,
252 orte_dt_unpack_node,
253 (opal_dss_copy_fn_t)orte_dt_copy_node,
254 (opal_dss_compare_fn_t)orte_dt_compare_node,
255 (opal_dss_print_fn_t)orte_dt_print_node,
256 OPAL_DSS_STRUCTURED,
257 "ORTE_NODE", &tmp))) {
258 ORTE_ERROR_LOG(rc);
259 return rc;
260 }
261
262 tmp = ORTE_PROC;
263 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_proc,
264 orte_dt_unpack_proc,
265 (opal_dss_copy_fn_t)orte_dt_copy_proc,
266 (opal_dss_compare_fn_t)orte_dt_compare_proc,
267 (opal_dss_print_fn_t)orte_dt_print_proc,
268 OPAL_DSS_STRUCTURED,
269 "ORTE_PROC", &tmp))) {
270 ORTE_ERROR_LOG(rc);
271 return rc;
272 }
273
274 tmp = ORTE_APP_CONTEXT;
275 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_app_context,
276 orte_dt_unpack_app_context,
277 (opal_dss_copy_fn_t)orte_dt_copy_app_context,
278 (opal_dss_compare_fn_t)orte_dt_compare_app_context,
279 (opal_dss_print_fn_t)orte_dt_print_app_context,
280 OPAL_DSS_STRUCTURED,
281 "ORTE_APP_CONTEXT", &tmp))) {
282 ORTE_ERROR_LOG(rc);
283 return rc;
284 }
285
286 tmp = ORTE_NODE_STATE;
287 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_node_state,
288 orte_dt_unpack_node_state,
289 (opal_dss_copy_fn_t)orte_dt_copy_node_state,
290 (opal_dss_compare_fn_t)orte_dt_compare_node_state,
291 (opal_dss_print_fn_t)orte_dt_std_print,
292 OPAL_DSS_UNSTRUCTURED,
293 "ORTE_NODE_STATE", &tmp))) {
294 ORTE_ERROR_LOG(rc);
295 return rc;
296 }
297
298 tmp = ORTE_PROC_STATE;
299 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_proc_state,
300 orte_dt_unpack_proc_state,
301 (opal_dss_copy_fn_t)orte_dt_copy_proc_state,
302 (opal_dss_compare_fn_t)orte_dt_compare_proc_state,
303 (opal_dss_print_fn_t)orte_dt_std_print,
304 OPAL_DSS_UNSTRUCTURED,
305 "ORTE_PROC_STATE", &tmp))) {
306 ORTE_ERROR_LOG(rc);
307 return rc;
308 }
309
310 tmp = ORTE_JOB_STATE;
311 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_job_state,
312 orte_dt_unpack_job_state,
313 (opal_dss_copy_fn_t)orte_dt_copy_job_state,
314 (opal_dss_compare_fn_t)orte_dt_compare_job_state,
315 (opal_dss_print_fn_t)orte_dt_std_print,
316 OPAL_DSS_UNSTRUCTURED,
317 "ORTE_JOB_STATE", &tmp))) {
318 ORTE_ERROR_LOG(rc);
319 return rc;
320 }
321
322 tmp = ORTE_EXIT_CODE;
323 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_exit_code,
324 orte_dt_unpack_exit_code,
325 (opal_dss_copy_fn_t)orte_dt_copy_exit_code,
326 (opal_dss_compare_fn_t)orte_dt_compare_exit_code,
327 (opal_dss_print_fn_t)orte_dt_std_print,
328 OPAL_DSS_UNSTRUCTURED,
329 "ORTE_EXIT_CODE", &tmp))) {
330 ORTE_ERROR_LOG(rc);
331 return rc;
332 }
333
334 tmp = ORTE_JOB_MAP;
335 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_map,
336 orte_dt_unpack_map,
337 (opal_dss_copy_fn_t)orte_dt_copy_map,
338 (opal_dss_compare_fn_t)orte_dt_compare_map,
339 (opal_dss_print_fn_t)orte_dt_print_map,
340 OPAL_DSS_STRUCTURED,
341 "ORTE_JOB_MAP", &tmp))) {
342 ORTE_ERROR_LOG(rc);
343 return rc;
344 }
345
346 tmp = ORTE_RML_TAG;
347 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_tag,
348 orte_dt_unpack_tag,
349 (opal_dss_copy_fn_t)orte_dt_copy_tag,
350 (opal_dss_compare_fn_t)orte_dt_compare_tags,
351 (opal_dss_print_fn_t)orte_dt_std_print,
352 OPAL_DSS_UNSTRUCTURED,
353 "ORTE_RML_TAG", &tmp))) {
354 ORTE_ERROR_LOG(rc);
355 return rc;
356 }
357
358 tmp = ORTE_DAEMON_CMD;
359 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_daemon_cmd,
360 orte_dt_unpack_daemon_cmd,
361 (opal_dss_copy_fn_t)orte_dt_copy_daemon_cmd,
362 (opal_dss_compare_fn_t)orte_dt_compare_daemon_cmd,
363 (opal_dss_print_fn_t)orte_dt_std_print,
364 OPAL_DSS_UNSTRUCTURED,
365 "ORTE_DAEMON_CMD", &tmp))) {
366 ORTE_ERROR_LOG(rc);
367 return rc;
368 }
369
370 tmp = ORTE_IOF_TAG;
371 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_iof_tag,
372 orte_dt_unpack_iof_tag,
373 (opal_dss_copy_fn_t)orte_dt_copy_iof_tag,
374 (opal_dss_compare_fn_t)orte_dt_compare_iof_tag,
375 (opal_dss_print_fn_t)orte_dt_std_print,
376 OPAL_DSS_UNSTRUCTURED,
377 "ORTE_IOF_TAG", &tmp))) {
378 ORTE_ERROR_LOG(rc);
379 return rc;
380 }
381
382 tmp = ORTE_ATTRIBUTE;
383 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_attr,
384 orte_dt_unpack_attr,
385 (opal_dss_copy_fn_t)orte_dt_copy_attr,
386 (opal_dss_compare_fn_t)orte_dt_compare_attr,
387 (opal_dss_print_fn_t)orte_dt_print_attr,
388 OPAL_DSS_STRUCTURED,
389 "ORTE_ATTRIBUTE", &tmp))) {
390 ORTE_ERROR_LOG(rc);
391 return rc;
392 }
393
394 tmp = ORTE_SIGNATURE;
395 if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_sig,
396 orte_dt_unpack_sig,
397 (opal_dss_copy_fn_t)orte_dt_copy_sig,
398 (opal_dss_compare_fn_t)orte_dt_compare_sig,
399 (opal_dss_print_fn_t)orte_dt_print_sig,
400 OPAL_DSS_STRUCTURED,
401 "ORTE_SIGNATURE", &tmp))) {
402 ORTE_ERROR_LOG(rc);
403 return rc;
404 }
405
406 return ORTE_SUCCESS;
407 }
408
409 orte_job_t* orte_get_job_data_object(orte_jobid_t job)
410 {
411 orte_job_t *jdata;
412
413
414 if (NULL == orte_job_data) {
415 return NULL;
416 }
417
418 jdata = NULL;
419 opal_hash_table_get_value_uint32(orte_job_data, job, (void**)&jdata);
420 return jdata;
421 }
422
423 orte_proc_t* orte_get_proc_object(orte_process_name_t *proc)
424 {
425 orte_job_t *jdata;
426 orte_proc_t *proct;
427
428 if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
429 return NULL;
430 }
431 proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
432 return proct;
433 }
434
435 orte_vpid_t orte_get_proc_daemon_vpid(orte_process_name_t *proc)
436 {
437 orte_job_t *jdata;
438 orte_proc_t *proct;
439
440 if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
441 return ORTE_VPID_INVALID;
442 }
443 if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid))) {
444 return ORTE_VPID_INVALID;
445 }
446 if (NULL == proct->node || NULL == proct->node->daemon) {
447 return ORTE_VPID_INVALID;
448 }
449 return proct->node->daemon->name.vpid;
450 }
451
452 char* orte_get_proc_hostname(orte_process_name_t *proc)
453 {
454 orte_proc_t *proct;
455 char *hostname = NULL;
456 int rc;
457
458
459
460 if (ORTE_PROC_IS_TOOL) {
461 return NULL;
462 }
463
464
465
466
467 if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
468
469 if (NULL == (proct = orte_get_proc_object(proc))) {
470 return NULL;
471 }
472 if (NULL == proct->node || NULL == proct->node->name) {
473 return NULL;
474 }
475 return proct->node->name;
476 }
477
478
479 OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_HOSTNAME,
480 (opal_process_name_t*)proc,
481 &hostname, OPAL_STRING);
482
483
484 return hostname;
485 }
486
487 orte_node_rank_t orte_get_proc_node_rank(orte_process_name_t *proc)
488 {
489 orte_proc_t *proct;
490 orte_node_rank_t *noderank, nd;
491 int rc;
492
493 if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
494
495 if (NULL == (proct = orte_get_proc_object(proc))) {
496 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
497 return ORTE_NODE_RANK_INVALID;
498 }
499 return proct->node_rank;
500 }
501
502
503 noderank = &nd;
504 OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_NODE_RANK,
505 (opal_process_name_t*)proc,
506 &noderank, ORTE_NODE_RANK);
507 if (OPAL_SUCCESS != rc) {
508 nd = ORTE_NODE_RANK_INVALID;
509 }
510 return nd;
511 }
512
513 orte_vpid_t orte_get_lowest_vpid_alive(orte_jobid_t job)
514 {
515 int i;
516 orte_job_t *jdata;
517 orte_proc_t *proc;
518
519 if (NULL == (jdata = orte_get_job_data_object(job))) {
520 return ORTE_VPID_INVALID;
521 }
522
523 if (ORTE_PROC_IS_DAEMON &&
524 ORTE_PROC_MY_NAME->jobid == job &&
525 NULL != orte_process_info.my_hnp_uri) {
526
527
528
529 return 1;
530 }
531
532 for (i=0; i < jdata->procs->size; i++) {
533 if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
534 continue;
535 }
536 if (proc->state == ORTE_PROC_STATE_RUNNING) {
537
538 return proc->name.vpid;
539 }
540 }
541
542 return ORTE_VPID_INVALID;
543 }
544
545
546
547
548
549
550
551 static void orte_app_context_construct(orte_app_context_t* app_context)
552 {
553 app_context->idx=0;
554 app_context->app=NULL;
555 app_context->num_procs=0;
556 OBJ_CONSTRUCT(&app_context->procs, opal_pointer_array_t);
557 opal_pointer_array_init(&app_context->procs,
558 1,
559 ORTE_GLOBAL_ARRAY_MAX_SIZE,
560 16);
561 app_context->state = ORTE_APP_STATE_UNDEF;
562 app_context->first_rank = 0;
563 app_context->argv=NULL;
564 app_context->env=NULL;
565 app_context->cwd=NULL;
566 app_context->flags = 0;
567 OBJ_CONSTRUCT(&app_context->attributes, opal_list_t);
568 }
569
570 static void orte_app_context_destructor(orte_app_context_t* app_context)
571 {
572 int i;
573 orte_proc_t *proc;
574
575 if (NULL != app_context->app) {
576 free (app_context->app);
577 app_context->app = NULL;
578 }
579
580 for (i=0; i < app_context->procs.size; i++) {
581 if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(&app_context->procs, i))) {
582 OBJ_RELEASE(proc);
583 }
584 }
585 OBJ_DESTRUCT(&app_context->procs);
586
587
588 if (NULL != app_context->argv) {
589 opal_argv_free(app_context->argv);
590 app_context->argv = NULL;
591 }
592
593 if (NULL != app_context->env) {
594 opal_argv_free(app_context->env);
595 app_context->env = NULL;
596 }
597
598 if (NULL != app_context->cwd) {
599 free (app_context->cwd);
600 app_context->cwd = NULL;
601 }
602
603 OPAL_LIST_DESTRUCT(&app_context->attributes);
604 }
605
606 OBJ_CLASS_INSTANCE(orte_app_context_t,
607 opal_object_t,
608 orte_app_context_construct,
609 orte_app_context_destructor);
610
611 static void orte_job_construct(orte_job_t* job)
612 {
613 job->personality = NULL;
614 job->jobid = ORTE_JOBID_INVALID;
615 job->offset = 0;
616 job->apps = OBJ_NEW(opal_pointer_array_t);
617 opal_pointer_array_init(job->apps,
618 1,
619 ORTE_GLOBAL_ARRAY_MAX_SIZE,
620 2);
621 job->num_apps = 0;
622 job->stdin_target = 0;
623 job->total_slots_alloc = 0;
624 job->num_procs = 0;
625 job->procs = OBJ_NEW(opal_pointer_array_t);
626 opal_pointer_array_init(job->procs,
627 ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
628 ORTE_GLOBAL_ARRAY_MAX_SIZE,
629 ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
630 job->map = NULL;
631 job->bookmark = NULL;
632 job->bkmark_obj = 0;
633 job->state = ORTE_JOB_STATE_UNDEF;
634
635 job->num_mapped = 0;
636 job->num_launched = 0;
637 job->num_reported = 0;
638 job->num_terminated = 0;
639 job->num_daemons_reported = 0;
640
641 job->originator.jobid = ORTE_JOBID_INVALID;
642 job->originator.vpid = ORTE_VPID_INVALID;
643 job->num_local_procs = 0;
644
645 job->flags = 0;
646 ORTE_FLAG_SET(job, ORTE_JOB_FLAG_FORWARD_OUTPUT);
647
648 OBJ_CONSTRUCT(&job->attributes, opal_list_t);
649 OBJ_CONSTRUCT(&job->launch_msg, opal_buffer_t);
650 }
651
652 static void orte_job_destruct(orte_job_t* job)
653 {
654 orte_proc_t *proc;
655 orte_app_context_t *app;
656 int n;
657 orte_timer_t *evtimer;
658
659 if (NULL == job) {
660
661 return;
662 }
663
664 if (orte_debug_flag) {
665 opal_output(0, "%s Releasing job data for %s",
666 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job->jobid));
667 }
668
669 if (NULL != job->personality) {
670 opal_argv_free(job->personality);
671 }
672 for (n=0; n < job->apps->size; n++) {
673 if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, n))) {
674 continue;
675 }
676 OBJ_RELEASE(app);
677 }
678 OBJ_RELEASE(job->apps);
679
680
681 evtimer = NULL;
682 if (orte_get_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT,
683 (void**)&evtimer, OPAL_PTR)) {
684 orte_remove_attribute(&job->attributes, ORTE_JOB_FAILURE_TIMER_EVENT);
685
686 OBJ_RELEASE(evtimer);
687 }
688 proc = NULL;
689 if (orte_get_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC,
690 (void**)&proc, OPAL_PTR)) {
691 orte_remove_attribute(&job->attributes, ORTE_JOB_ABORTED_PROC);
692
693 OBJ_RELEASE(proc);
694 }
695
696 if (NULL != job->map) {
697 OBJ_RELEASE(job->map);
698 job->map = NULL;
699 }
700
701 for (n=0; n < job->procs->size; n++) {
702 if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(job->procs, n))) {
703 continue;
704 }
705 OBJ_RELEASE(proc);
706 }
707 OBJ_RELEASE(job->procs);
708
709
710 OPAL_LIST_DESTRUCT(&job->attributes);
711
712 OBJ_DESTRUCT(&job->launch_msg);
713
714 if (NULL != orte_job_data && ORTE_JOBID_INVALID != job->jobid) {
715
716 opal_hash_table_remove_value_uint32(orte_job_data, job->jobid);
717 }
718 }
719
720 OBJ_CLASS_INSTANCE(orte_job_t,
721 opal_list_item_t,
722 orte_job_construct,
723 orte_job_destruct);
724
725
726 static void orte_node_construct(orte_node_t* node)
727 {
728 node->index = -1;
729 node->name = NULL;
730 node->daemon = NULL;
731
732 node->num_procs = 0;
733 node->procs = OBJ_NEW(opal_pointer_array_t);
734 opal_pointer_array_init(node->procs,
735 ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
736 ORTE_GLOBAL_ARRAY_MAX_SIZE,
737 ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
738 node->next_node_rank = 0;
739
740 node->state = ORTE_NODE_STATE_UNKNOWN;
741 node->slots = 0;
742 node->slots_inuse = 0;
743 node->slots_max = 0;
744 node->topology = NULL;
745
746 node->flags = 0;
747 OBJ_CONSTRUCT(&node->attributes, opal_list_t);
748 }
749
750 static void orte_node_destruct(orte_node_t* node)
751 {
752 int i;
753 orte_proc_t *proc;
754
755 if (NULL != node->name) {
756 free(node->name);
757 node->name = NULL;
758 }
759
760 if (NULL != node->daemon) {
761 node->daemon->node = NULL;
762 OBJ_RELEASE(node->daemon);
763 node->daemon = NULL;
764 }
765
766 for (i=0; i < node->procs->size; i++) {
767 if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
768 opal_pointer_array_set_item(node->procs, i, NULL);
769 OBJ_RELEASE(proc);
770 }
771 }
772 OBJ_RELEASE(node->procs);
773
774
775
776
777 OPAL_LIST_DESTRUCT(&node->attributes);
778 }
779
780
781 OBJ_CLASS_INSTANCE(orte_node_t,
782 opal_list_item_t,
783 orte_node_construct,
784 orte_node_destruct);
785
786
787
788 static void orte_proc_construct(orte_proc_t* proc)
789 {
790 proc->name = *ORTE_NAME_INVALID;
791 proc->pid = 0;
792 proc->local_rank = ORTE_LOCAL_RANK_INVALID;
793 proc->node_rank = ORTE_NODE_RANK_INVALID;
794 proc->app_rank = -1;
795 proc->last_errmgr_state = ORTE_PROC_STATE_UNDEF;
796 proc->state = ORTE_PROC_STATE_UNDEF;
797 proc->app_idx = 0;
798 proc->node = NULL;
799 proc->exit_code = 0;
800 proc->rml_uri = NULL;
801 proc->flags = 0;
802 OBJ_CONSTRUCT(&proc->attributes, opal_list_t);
803 }
804
805 static void orte_proc_destruct(orte_proc_t* proc)
806 {
807 if (NULL != proc->node) {
808 OBJ_RELEASE(proc->node);
809 proc->node = NULL;
810 }
811
812 if (NULL != proc->rml_uri) {
813 free(proc->rml_uri);
814 proc->rml_uri = NULL;
815 }
816
817 OPAL_LIST_DESTRUCT(&proc->attributes);
818 }
819
820 OBJ_CLASS_INSTANCE(orte_proc_t,
821 opal_list_item_t,
822 orte_proc_construct,
823 orte_proc_destruct);
824
825 static void orte_job_map_construct(orte_job_map_t* map)
826 {
827 map->req_mapper = NULL;
828 map->last_mapper = NULL;
829 map->mapping = 0;
830 map->ranking = 0;
831 map->binding = 0;
832 map->ppr = NULL;
833 map->cpus_per_rank = 0;
834 map->display_map = false;
835 map->num_new_daemons = 0;
836 map->daemon_vpid_start = ORTE_VPID_INVALID;
837 map->num_nodes = 0;
838 map->nodes = OBJ_NEW(opal_pointer_array_t);
839 opal_pointer_array_init(map->nodes,
840 ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
841 ORTE_GLOBAL_ARRAY_MAX_SIZE,
842 ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
843 }
844
845 static void orte_job_map_destruct(orte_job_map_t* map)
846 {
847 orte_std_cntr_t i;
848 orte_node_t *node;
849
850 if (NULL != map->req_mapper) {
851 free(map->req_mapper);
852 }
853 if (NULL != map->last_mapper) {
854 free(map->last_mapper);
855 }
856 if (NULL != map->ppr) {
857 free(map->ppr);
858 }
859 for (i=0; i < map->nodes->size; i++) {
860 if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
861 OBJ_RELEASE(node);
862 opal_pointer_array_set_item(map->nodes, i, NULL);
863 }
864 }
865 OBJ_RELEASE(map->nodes);
866 }
867
868 OBJ_CLASS_INSTANCE(orte_job_map_t,
869 opal_object_t,
870 orte_job_map_construct,
871 orte_job_map_destruct);
872
873 static void orte_attr_cons(orte_attribute_t* p)
874 {
875 p->key = 0;
876 p->local = true;
877 memset(&p->data, 0, sizeof(p->data));
878 }
879 static void orte_attr_des(orte_attribute_t *p)
880 {
881 if (OPAL_BYTE_OBJECT == p->type) {
882 if (NULL != p->data.bo.bytes) {
883 free(p->data.bo.bytes);
884 }
885 } else if (OPAL_BUFFER == p->type) {
886 OBJ_DESTRUCT(&p->data.buf);
887 } else if (OPAL_STRING == p->type) {
888 free(p->data.string);
889 }
890 }
891 OBJ_CLASS_INSTANCE(orte_attribute_t,
892 opal_list_item_t,
893 orte_attr_cons, orte_attr_des);
894
895 static void tcon(orte_topology_t *t)
896 {
897 t->topo = NULL;
898 t->sig = NULL;
899 }
900 static void tdes(orte_topology_t *t)
901 {
902 if (NULL != t->topo) {
903 opal_hwloc_base_free_topology(t->topo);
904 }
905 if (NULL != t->sig) {
906 free(t->sig);
907 }
908 }
909 OBJ_CLASS_INSTANCE(orte_topology_t,
910 opal_object_t,
911 tcon, tdes);