This source file includes following definitions.
- opal_err2str
- opal_init_psm
- opal_init_error
- opal_init_util
- opal_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 #ifdef HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39
40 #include "opal_config.h"
41
42 #include "opal/util/malloc.h"
43 #include "opal/util/arch.h"
44 #include "opal/util/output.h"
45 #include "opal/util/show_help.h"
46 #include "opal/util/proc.h"
47 #include "opal/memoryhooks/memory.h"
48 #include "opal/mca/base/base.h"
49 #include "opal/mca/base/mca_base_var.h"
50 #include "opal/runtime/opal.h"
51 #include "opal/util/net.h"
52 #include "opal/datatype/opal_datatype.h"
53 #include "opal/mca/installdirs/base/base.h"
54 #include "opal/mca/memory/base/base.h"
55 #include "opal/mca/patcher/base/base.h"
56 #include "opal/mca/memcpy/base/base.h"
57 #include "opal/mca/hwloc/base/base.h"
58 #include "opal/mca/reachable/base/base.h"
59 #include "opal/mca/timer/base/base.h"
60 #include "opal/mca/memchecker/base/base.h"
61 #include "opal/mca/if/base/base.h"
62 #include "opal/dss/dss.h"
63 #include "opal/mca/shmem/base/base.h"
64 #include "opal/mca/compress/base/base.h"
65 #include "opal/threads/threads.h"
66 #include "opal/threads/tsd.h"
67
68 #include "opal/runtime/opal_cr.h"
69 #include "opal/mca/crs/base/base.h"
70
71 #include "opal/runtime/opal_progress.h"
72 #include "opal/mca/event/base/base.h"
73 #include "opal/mca/backtrace/base/base.h"
74
75 #include "opal/constants.h"
76 #include "opal/util/error.h"
77 #include "opal/util/stacktrace.h"
78 #include "opal/util/keyval_parse.h"
79 #include "opal/util/sys_limits.h"
80 #include "opal/util/timings.h"
81
82 #if OPAL_CC_USE_PRAGMA_IDENT
83 #pragma ident OPAL_IDENT_STRING
84 #elif OPAL_CC_USE_IDENT
85 #ident OPAL_IDENT_STRING
86 #endif
87 const char opal_version_string[] = OPAL_IDENT_STRING;
88
89 int opal_initialized = 0;
90 bool opal_init_called = false;
91 int opal_util_initialized = 0;
92
93
94
95 int opal_cache_line_size = 128;
96 bool opal_warn_on_fork = true;
97
98 static int
99 opal_err2str(int errnum, const char **errmsg)
100 {
101 const char *retval;
102
103 switch (errnum) {
104 case OPAL_SUCCESS:
105 retval = "Success";
106 break;
107 case OPAL_ERROR:
108 retval = "Error";
109 break;
110 case OPAL_ERR_OUT_OF_RESOURCE:
111 retval = "Out of resource";
112 break;
113 case OPAL_ERR_TEMP_OUT_OF_RESOURCE:
114 retval = "Temporarily out of resource";
115 break;
116 case OPAL_ERR_RESOURCE_BUSY:
117 retval = "Resource busy";
118 break;
119 case OPAL_ERR_BAD_PARAM:
120 retval = "Bad parameter";
121 break;
122 case OPAL_ERR_FATAL:
123 retval = "Fatal";
124 break;
125 case OPAL_ERR_NOT_IMPLEMENTED:
126 retval = "Not implemented";
127 break;
128 case OPAL_ERR_NOT_SUPPORTED:
129 retval = "Not supported";
130 break;
131 case OPAL_ERR_INTERRUPTED:
132 retval = "Interrupted";
133 break;
134 case OPAL_ERR_WOULD_BLOCK:
135 retval = "Would block";
136 break;
137 case OPAL_ERR_IN_ERRNO:
138 retval = "In errno";
139 break;
140 case OPAL_ERR_UNREACH:
141 retval = "Unreachable";
142 break;
143 case OPAL_ERR_NOT_FOUND:
144 retval = "Not found";
145 break;
146 case OPAL_EXISTS:
147 retval = "Exists";
148 break;
149 case OPAL_ERR_TIMEOUT:
150 retval = "Timeout";
151 break;
152 case OPAL_ERR_NOT_AVAILABLE:
153 retval = "Not available";
154 break;
155 case OPAL_ERR_PERM:
156 retval = "No permission";
157 break;
158 case OPAL_ERR_VALUE_OUT_OF_BOUNDS:
159 retval = "Value out of bounds";
160 break;
161 case OPAL_ERR_FILE_READ_FAILURE:
162 retval = "File read failure";
163 break;
164 case OPAL_ERR_FILE_WRITE_FAILURE:
165 retval = "File write failure";
166 break;
167 case OPAL_ERR_FILE_OPEN_FAILURE:
168 retval = "File open failure";
169 break;
170 case OPAL_ERR_PACK_MISMATCH:
171 retval = "Pack data mismatch";
172 break;
173 case OPAL_ERR_PACK_FAILURE:
174 retval = "Data pack failed";
175 break;
176 case OPAL_ERR_UNPACK_FAILURE:
177 retval = "Data unpack failed";
178 break;
179 case OPAL_ERR_UNPACK_INADEQUATE_SPACE:
180 retval = "Data unpack had inadequate space";
181 break;
182 case OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER:
183 retval = "Data unpack would read past end of buffer";
184 break;
185 case OPAL_ERR_OPERATION_UNSUPPORTED:
186 retval = "Requested operation is not supported on referenced data type";
187 break;
188 case OPAL_ERR_UNKNOWN_DATA_TYPE:
189 retval = "Unknown data type";
190 break;
191 case OPAL_ERR_BUFFER:
192 retval = "Buffer type (described vs non-described) mismatch - operation not allowed";
193 break;
194 case OPAL_ERR_DATA_TYPE_REDEF:
195 retval = "Attempt to redefine an existing data type";
196 break;
197 case OPAL_ERR_DATA_OVERWRITE_ATTEMPT:
198 retval = "Attempt to overwrite a data value";
199 break;
200 case OPAL_ERR_MODULE_NOT_FOUND:
201 retval = "Framework requires at least one active module, but none found";
202 break;
203 case OPAL_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED:
204 retval = "OS topology does not support slot_list process affinity";
205 break;
206 case OPAL_ERR_TOPO_SOCKET_NOT_SUPPORTED:
207 retval = "Could not obtain socket topology information";
208 break;
209 case OPAL_ERR_TOPO_CORE_NOT_SUPPORTED:
210 retval = "Could not obtain core topology information";
211 break;
212 case OPAL_ERR_NOT_ENOUGH_SOCKETS:
213 retval = "Not enough sockets to meet request";
214 break;
215 case OPAL_ERR_NOT_ENOUGH_CORES:
216 retval = "Not enough cores to meet request";
217 break;
218 case OPAL_ERR_INVALID_PHYS_CPU:
219 retval = "Invalid physical cpu number returned";
220 break;
221 case OPAL_ERR_MULTIPLE_AFFINITIES:
222 retval = "Multiple methods for assigning process affinity were specified";
223 break;
224 case OPAL_ERR_SLOT_LIST_RANGE:
225 retval = "Provided slot_list range is invalid";
226 break;
227 case OPAL_ERR_NETWORK_NOT_PARSEABLE:
228 retval = "Provided network specification is not parseable";
229 break;
230 case OPAL_ERR_SILENT:
231 retval = NULL;
232 break;
233 case OPAL_ERR_NOT_INITIALIZED:
234 retval = "Not initialized";
235 break;
236 case OPAL_ERR_NOT_BOUND:
237 retval = "Not bound";
238 break;
239 case OPAL_ERR_TAKE_NEXT_OPTION:
240 retval = "Take next option";
241 break;
242 case OPAL_ERR_PROC_ENTRY_NOT_FOUND:
243 retval = "Database entry not found";
244 break;
245 case OPAL_ERR_DATA_VALUE_NOT_FOUND:
246 retval = "Data for specified key not found";
247 break;
248 case OPAL_ERR_CONNECTION_FAILED:
249 retval = "Connection failed";
250 break;
251 case OPAL_ERR_AUTHENTICATION_FAILED:
252 retval = "Authentication failed";
253 break;
254 case OPAL_ERR_COMM_FAILURE:
255 retval = "Comm failure";
256 break;
257 case OPAL_ERR_SERVER_NOT_AVAIL:
258 retval = "Server not available";
259 break;
260 case OPAL_ERR_IN_PROCESS:
261 retval = "Operation in process";
262 break;
263 case OPAL_ERR_DEBUGGER_RELEASE:
264 retval = "Release debugger";
265 break;
266 case OPAL_ERR_HANDLERS_COMPLETE:
267 retval = "Event handlers complete";
268 break;
269 case OPAL_ERR_PARTIAL_SUCCESS:
270 retval = "Partial success";
271 break;
272 case OPAL_ERR_PROC_ABORTED:
273 retval = "Process abnormally terminated";
274 break;
275 case OPAL_ERR_PROC_REQUESTED_ABORT:
276 retval = "Process requested abort";
277 break;
278 case OPAL_ERR_PROC_ABORTING:
279 retval = "Process is aborting";
280 break;
281 case OPAL_ERR_NODE_DOWN:
282 retval = "Node has gone down";
283 break;
284 case OPAL_ERR_NODE_OFFLINE:
285 retval = "Node has gone offline";
286 break;
287 case OPAL_ERR_JOB_TERMINATED:
288 retval = "Job terminated";
289 break;
290 case OPAL_ERR_PROC_RESTART:
291 retval = "Process restarted";
292 break;
293 case OPAL_ERR_PROC_CHECKPOINT:
294 retval = "Process checkpoint";
295 break;
296 case OPAL_ERR_PROC_MIGRATE:
297 retval = "Process migrate";
298 break;
299 case OPAL_ERR_EVENT_REGISTRATION:
300 retval = "Event registration";
301 break;
302 case OPAL_ERR_HEARTBEAT_ALERT:
303 retval = "Heartbeat not received";
304 break;
305 case OPAL_ERR_FILE_ALERT:
306 retval = "File alert - proc may have stalled";
307 break;
308 case OPAL_ERR_MODEL_DECLARED:
309 retval = "Model declared";
310 break;
311 case OPAL_PMIX_LAUNCH_DIRECTIVE:
312 retval = "Launch directive";
313 break;
314
315 default:
316 retval = "UNRECOGNIZED";
317 }
318
319 *errmsg = retval;
320 return OPAL_SUCCESS;
321 }
322
323
324 int opal_init_psm(void)
325 {
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 if (NULL == getenv("IPATH_NO_BACKTRACE")) {
342 opal_setenv("IPATH_NO_BACKTRACE", "1", true, &environ);
343 }
344 if (NULL == getenv("HFI_NO_BACKTRACE")) {
345 opal_setenv("HFI_NO_BACKTRACE", "1", true, &environ);
346 }
347
348 return OPAL_SUCCESS;
349 }
350
351 static int opal_init_error (const char *error, int ret)
352 {
353 if (OPAL_ERR_SILENT != ret) {
354 opal_show_help( "help-opal-runtime.txt",
355 "opal_init:startup:internal-failure", true,
356 error, ret );
357 }
358 return ret;
359 }
360
361 static mca_base_framework_t *opal_init_util_frameworks[] = {
362 &opal_installdirs_base_framework, &opal_if_base_framework, NULL,
363 };
364
365 int
366 opal_init_util(int* pargc, char*** pargv)
367 {
368 int ret;
369 char *error = NULL;
370 char hostname[OPAL_MAXHOSTNAMELEN];
371 OPAL_TIMING_ENV_INIT(otmng);
372
373 if( ++opal_util_initialized != 1 ) {
374 if( opal_util_initialized < 1 ) {
375 return OPAL_ERROR;
376 }
377 return OPAL_SUCCESS;
378 }
379
380
381 OBJ_CONSTRUCT(&opal_init_util_domain, opal_finalize_domain_t);
382 (void) opal_finalize_domain_init (&opal_init_util_domain, "opal_init_util");
383 opal_finalize_set_domain (&opal_init_util_domain);
384
385 opal_thread_set_main();
386
387 opal_init_called = true;
388
389
390
391
392
393 gethostname(hostname, sizeof(hostname));
394 opal_process_info.nodename = strdup(hostname);
395
396
397 opal_malloc_init();
398
399 OPAL_TIMING_ENV_NEXT(otmng, "opal_malloc_init");
400
401
402 opal_output_init();
403
404
405 if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_installdirs_base_framework, 0))) {
406 fprintf(stderr, "opal_installdirs_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n",
407 __FILE__, __LINE__, ret);
408 return ret;
409 }
410
411
412 opal_show_help_init();
413
414 OPAL_TIMING_ENV_NEXT(otmng, "opal_show_help_init");
415
416
417 if (OPAL_SUCCESS !=
418 (ret = opal_error_register("OPAL",
419 OPAL_ERR_BASE, OPAL_ERR_MAX, opal_err2str))) {
420 return opal_init_error ("opal_error_register", ret);
421 }
422
423
424 if (OPAL_SUCCESS != (ret = opal_util_keyval_parse_init())) {
425 return opal_init_error ("opal_util_keyval_parse_init", ret);
426 }
427
428
429
430 opal_init_psm();
431
432 OPAL_TIMING_ENV_NEXT(otmng, "opal_init_psm");
433
434
435 if (OPAL_SUCCESS != (ret = mca_base_var_init())) {
436 return opal_init_error ("mca_base_var_init", ret);
437 }
438 OPAL_TIMING_ENV_NEXT(otmng, "opal_var_init");
439
440
441 if (OPAL_SUCCESS != (ret = mca_base_var_cache_files(false))) {
442 return opal_init_error ("failed to cache files", ret);
443 }
444
445 OPAL_TIMING_ENV_NEXT(otmng, "opal_var_cache");
446
447
448
449 if (OPAL_SUCCESS != (ret = opal_register_params())) {
450 return opal_init_error ("opal_register_params", ret);
451 }
452
453 if (OPAL_SUCCESS != (ret = opal_net_init())) {
454 return opal_init_error ("opal_net_init", ret);
455 }
456
457 OPAL_TIMING_ENV_NEXT(otmng, "opal_net_init");
458
459
460 if (OPAL_SUCCESS != (ret = opal_util_register_stackhandlers())) {
461 return opal_init_error ("opal_util_register_stackhandlers", ret);
462 }
463
464
465
466
467 if (OPAL_SUCCESS != (ret = opal_util_init_sys_limits(&error))) {
468 opal_show_help("help-opal-runtime.txt",
469 "opal_init:syslimit", false,
470 error);
471 return OPAL_ERR_SILENT;
472 }
473
474
475 if (OPAL_SUCCESS != (ret = opal_arch_init ())) {
476 return opal_init_error ("opal_arch_init", ret);
477 }
478
479 OPAL_TIMING_ENV_NEXT(otmng, "opal_arch_init");
480
481
482 if (OPAL_SUCCESS != (ret = opal_datatype_init ())) {
483 return opal_init_error ("opal_datatype_init", ret);
484 }
485
486 OPAL_TIMING_ENV_NEXT(otmng, "opal_datatype_init");
487
488
489 if (OPAL_SUCCESS != (ret = opal_dss_open())) {
490 return opal_init_error ("opal_dss_open", ret);
491 }
492
493 OPAL_TIMING_ENV_NEXT(otmng, "opal_dss_open");
494
495
496 if (OPAL_SUCCESS != (ret = mca_base_open())) {
497 return opal_init_error ("mca_base_open", ret);
498 }
499
500 OPAL_TIMING_ENV_NEXT(otmng, "mca_base_open");
501
502
503 if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_if_base_framework, 0))) {
504 fprintf(stderr, "opal_if_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n",
505 __FILE__, __LINE__, ret);
506 return ret;
507 }
508
509
510 opal_finalize_register_cleanup_arg (mca_base_framework_close_list, opal_init_util_frameworks);
511
512 OPAL_TIMING_ENV_NEXT(otmng, "opal_if_init");
513
514 return OPAL_SUCCESS;
515 }
516
517
518
519
520
521
522 static mca_base_framework_t *opal_init_frameworks[] = {
523 &opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
524 &opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
525 &opal_shmem_base_framework, &opal_reachable_base_framework, &opal_compress_base_framework,
526 NULL,
527 };
528
529 int
530 opal_init(int* pargc, char*** pargv)
531 {
532 int ret;
533
534 if( ++opal_initialized != 1 ) {
535 if( opal_initialized < 1 ) {
536 return OPAL_ERROR;
537 }
538 return OPAL_SUCCESS;
539 }
540
541
542 if (OPAL_SUCCESS != (ret = opal_init_util(pargc, pargv))) {
543 return ret;
544 }
545
546 OBJ_CONSTRUCT(&opal_init_domain, opal_finalize_domain_t);
547 (void) opal_finalize_domain_init (&opal_init_domain, "opal_init");
548 opal_finalize_set_domain (&opal_init_domain);
549
550 opal_finalize_register_cleanup_arg (mca_base_framework_close_list, opal_init_frameworks);
551 opal_finalize_register_cleanup (opal_tsd_keys_destruct);
552
553 ret = mca_base_framework_open_list (opal_init_frameworks, 0);
554 if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
555 return opal_init_error ("opal_init framework open", ret);
556 }
557
558
559 if (OPAL_SUCCESS != (ret = opal_mem_hooks_init())) {
560 return opal_init_error ("opal_mem_hooks_init", ret);
561 }
562
563
564 if (OPAL_SUCCESS != (ret = opal_memchecker_base_select())) {
565 return opal_init_error ("opal_memchecker_base_select", ret);
566 }
567
568
569
570
571 if (OPAL_SUCCESS != (ret = opal_progress_init())) {
572 return opal_init_error ("opal_progress_init", ret);
573 }
574
575 opal_progress_event_users_increment();
576
577
578 if (OPAL_SUCCESS != (ret = opal_shmem_base_select())) {
579 return opal_init_error ("opal_shmem_base_select", ret);
580 }
581
582
583 if (OPAL_SUCCESS != (ret = opal_reachable_base_select())) {
584 return opal_init_error ("opal_reachable_base_select", ret);
585 }
586
587
588 if (OPAL_SUCCESS != (ret = opal_compress_base_select())) {
589 return opal_init_error ("opal_compress_base_select", ret);
590 }
591
592 return OPAL_SUCCESS;
593 }