This source file includes following definitions.
- infocb
- orte_ess_base_tool_setup
- orte_ess_base_tool_finalize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "orte_config.h"
26 #include "orte/constants.h"
27
28 #include <sys/types.h>
29 #include <stdio.h>
30 #ifdef HAVE_FCNTL_H
31 #include <fcntl.h>
32 #endif
33 #ifdef HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36
37 #include "opal/mca/event/event.h"
38 #include "opal/mca/pmix/base/base.h"
39 #include "opal/runtime/opal.h"
40 #include "opal/runtime/opal_progress_threads.h"
41 #include "opal/util/arch.h"
42 #include "opal/util/opal_environ.h"
43 #include "opal/util/argv.h"
44 #include "opal/util/proc.h"
45
46 #include "orte/mca/iof/base/base.h"
47 #include "orte/mca/oob/base/base.h"
48 #include "orte/mca/plm/base/base.h"
49 #include "orte/mca/rml/base/base.h"
50 #include "orte/mca/rml/base/rml_contact.h"
51 #include "orte/mca/routed/base/base.h"
52 #include "orte/mca/errmgr/base/base.h"
53 #include "orte/mca/state/base/base.h"
54 #include "orte/util/proc_info.h"
55 #include "orte/util/session_dir.h"
56 #include "orte/util/show_help.h"
57
58 #include "orte/runtime/orte_globals.h"
59 #include "orte/runtime/orte_wait.h"
60
61 #include "orte/mca/ess/base/base.h"
62
63
64 static void infocb(int status,
65 opal_list_t *info,
66 void *cbdata,
67 opal_pmix_release_cbfunc_t release_fn,
68 void *release_cbdata)
69 {
70 opal_value_t *kv;
71 opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
72
73 if (OPAL_SUCCESS != status) {
74 ORTE_ERROR_LOG(status);
75 } else {
76 kv = (opal_value_t*)opal_list_get_first(info);
77 if (NULL == kv) {
78 ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
79 } else {
80 if (0 == strcmp(kv->key, OPAL_PMIX_SERVER_URI)) {
81 orte_process_info.my_hnp_uri = strdup(kv->data.string);
82 } else {
83 ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
84 }
85 }
86 }
87 if (NULL != release_fn) {
88 release_fn(release_cbdata);
89 }
90 OPAL_PMIX_WAKEUP_THREAD(lock);
91 }
92
93 int orte_ess_base_tool_setup(opal_list_t *flags)
94 {
95 int ret;
96 char *error = NULL;
97 opal_list_t info;
98 opal_value_t *kv, *knext, val;
99 opal_pmix_query_t *q;
100 opal_pmix_lock_t lock;
101 opal_buffer_t *buf;
102
103
104
105 orte_event_base = opal_progress_thread_init("tool");
106
107
108
109 opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
110 if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
111 ORTE_ERROR_LOG(ret);
112 error = "orte_pmix_base_open";
113 goto error;
114 }
115 if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
116 ORTE_ERROR_LOG(ret);
117 error = "opal_pmix_base_select";
118 goto error;
119 }
120 if (NULL == opal_pmix.tool_init) {
121
122 orte_show_help("help-ess-base.txt",
123 "legacy-tool", true);
124 ret = ORTE_ERR_SILENT;
125 error = "opal_pmix.tool_init";
126 goto error;
127 }
128
129 opal_pmix_base_set_evbase(orte_event_base);
130
131
132 OBJ_CONSTRUCT(&info, opal_list_t);
133 if (NULL != flags) {
134
135 OPAL_LIST_FOREACH_SAFE(kv, knext, flags, opal_value_t) {
136 opal_list_remove_item(flags, &kv->super);
137 opal_list_append(&info, &kv->super);
138 }
139 }
140 if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
141 ORTE_ERROR_LOG(ret);
142 error = "opal_pmix.init";
143 OPAL_LIST_DESTRUCT(&info);
144 goto error;
145 }
146 OPAL_LIST_DESTRUCT(&info);
147
148 ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
149 ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
150 orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
151 orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
152 orte_process_info.super.proc_arch = opal_local_arch;
153 opal_proc_local_set(&orte_process_info.super);
154
155 if (NULL != opal_pmix.query) {
156
157 OBJ_CONSTRUCT(&info, opal_list_t);
158 q = OBJ_NEW(opal_pmix_query_t);
159 opal_argv_append_nosize(&q->keys, OPAL_PMIX_SERVER_URI);
160 opal_list_append(&info, &q->super);
161 OPAL_PMIX_CONSTRUCT_LOCK(&lock);
162 opal_pmix.query(&info, infocb, &lock);
163 OPAL_PMIX_WAIT_THREAD(&lock);
164 OPAL_PMIX_DESTRUCT_LOCK(&lock);
165 OPAL_LIST_DESTRUCT(&info);
166 }
167
168
169 if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
170 ORTE_ERROR_LOG(ret);
171 error = "orte_state_base_open";
172 goto error;
173 }
174 if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
175 ORTE_ERROR_LOG(ret);
176 error = "orte_state_base_select";
177 goto error;
178 }
179
180 if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
181 ORTE_ERROR_LOG(ret);
182 error = "orte_errmgr_base_open";
183 goto error;
184 }
185 if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
186 ORTE_ERROR_LOG(ret);
187 error = "orte_errmgr_base_select";
188 goto error;
189 }
190
191
192 if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
193 ORTE_ERROR_LOG(ret);
194 error = "orte_rml_base_open";
195 goto error;
196 }
197 if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
198 ORTE_ERROR_LOG(ret);
199 error = "orte_routed_base_select";
200 goto error;
201 }
202 if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
203 ORTE_ERROR_LOG(ret);
204 error = "orte_oob_base_open";
205 goto error;
206 }
207 if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
208 ORTE_ERROR_LOG(ret);
209 error = "orte_oob_base_select";
210 goto error;
211 }
212
213 if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
214 ORTE_ERROR_LOG(ret);
215 error = "orte_rml_base_open";
216 goto error;
217 }
218 if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
219 ORTE_ERROR_LOG(ret);
220 error = "orte_rml_base_select";
221 goto error;
222 }
223
224
225
226
227
228
229
230 ret = orte_session_setup_base(ORTE_PROC_MY_NAME);
231 if (ORTE_SUCCESS != ret ) {
232 ORTE_ERROR_LOG(ret);
233 error = "define session dir names";
234 goto error;
235 }
236
237
238 if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) {
239
240 if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
241 orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
242 exit(1);
243 }
244
245
246
247
248 OBJ_CONSTRUCT(&val, opal_value_t);
249 val.key = OPAL_PMIX_PROC_URI;
250 val.type = OPAL_STRING;
251 val.data.string = orte_process_info.my_hnp_uri;
252 if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_HNP, &val))) {
253 ORTE_ERROR_LOG(ret);
254 val.key = NULL;
255 val.data.string = NULL;
256 OBJ_DESTRUCT(&val);
257 error = "store HNP URI";
258 goto error;
259 }
260 val.key = NULL;
261 val.data.string = NULL;
262 OBJ_DESTRUCT(&val);
263
264 if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
265 orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
266 orte_finalize();
267 exit(1);
268 }
269
270
271 buf = OBJ_NEW(opal_buffer_t);
272 ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP,
273 buf, ORTE_RML_TAG_WARMUP_CONNECTION,
274 orte_rml_send_callback, NULL);
275 if (ORTE_SUCCESS != ret) {
276 ORTE_ERROR_LOG(ret);
277 error = "warmup connection";
278 goto error;
279 }
280
281
282 orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
283
284
285 if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
286 ORTE_ERROR_LOG(ret);
287 error = "orte_iof_base_open";
288 goto error;
289 }
290 if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
291 ORTE_ERROR_LOG(ret);
292 error = "orte_iof_base_select";
293 goto error;
294 }
295
296 }
297
298 return ORTE_SUCCESS;
299
300 error:
301 orte_show_help("help-orte-runtime.txt",
302 "orte_init:startup:internal-failure",
303 true, error, ORTE_ERROR_NAME(ret), ret);
304
305 return ret;
306 }
307
308 int orte_ess_base_tool_finalize(void)
309 {
310 orte_wait_finalize();
311
312
313
314
315
316 if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) {
317 (void) mca_base_framework_close(&orte_iof_base_framework);
318 }
319 (void) mca_base_framework_close(&orte_routed_base_framework);
320 (void) mca_base_framework_close(&orte_rml_base_framework);
321 (void) mca_base_framework_close(&orte_errmgr_base_framework);
322
323 opal_pmix.finalize();
324 (void) mca_base_framework_close(&opal_pmix_base_framework);
325
326 return ORTE_SUCCESS;
327 }