This source file includes following definitions.
- init
- finalize
- delete_route
- update_route
- get_route
- route_lost
- route_is_defined
- set_lifeline
- update_routing_plan
- get_routing_list
- num_routes
- direct_ft_event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #include "orte_config.h"
16 #include "orte/constants.h"
17
18 #include "opal/dss/dss.h"
19 #include "opal/util/output.h"
20
21 #include "orte/mca/errmgr/errmgr.h"
22 #include "orte/mca/rml/rml.h"
23 #include "orte/util/name_fns.h"
24 #include "orte/util/proc_info.h"
25 #include "orte/runtime/orte_globals.h"
26 #include "orte/runtime/data_type_support/orte_dt_support.h"
27 #include "orte/runtime/orte_wait.h"
28
29 #include "orte/mca/rml/base/rml_contact.h"
30
31 #include "orte/mca/routed/base/base.h"
32 #include "routed_direct.h"
33
34 static int init(void);
35 static int finalize(void);
36 static int delete_route(orte_process_name_t *proc);
37 static int update_route(orte_process_name_t *target,
38 orte_process_name_t *route);
39 static orte_process_name_t get_route(orte_process_name_t *target);
40 static int route_lost(const orte_process_name_t *route);
41 static bool route_is_defined(const orte_process_name_t *target);
42 static void update_routing_plan(void);
43 static void get_routing_list(opal_list_t *coll);
44 static int set_lifeline(orte_process_name_t *proc);
45 static size_t num_routes(void);
46
47 #if OPAL_ENABLE_FT_CR == 1
48 static int direct_ft_event(int state);
49 #endif
50
51 orte_routed_module_t orte_routed_direct_module = {
52 .initialize = init,
53 .finalize = finalize,
54 .delete_route = delete_route,
55 .update_route = update_route,
56 .get_route = get_route,
57 .route_lost = route_lost,
58 .route_is_defined = route_is_defined,
59 .set_lifeline = set_lifeline,
60 .update_routing_plan = update_routing_plan,
61 .get_routing_list = get_routing_list,
62 .num_routes = num_routes,
63 #if OPAL_ENABLE_FT_CR == 1
64 .ft_event = direct_ft_event
65 #else
66 NULL
67 #endif
68 };
69
70 static orte_process_name_t mylifeline;
71 static orte_process_name_t *lifeline = NULL;
72 static opal_list_t my_children;
73
74 static int init(void)
75 {
76 lifeline = NULL;
77
78 if (ORTE_PROC_IS_DAEMON) {
79 ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid;
80
81 if (orte_static_ports) {
82
83 lifeline = ORTE_PROC_MY_PARENT;
84 } else {
85
86 lifeline = ORTE_PROC_MY_HNP;
87 ORTE_PROC_MY_PARENT->vpid = 0;
88 }
89 } else if (ORTE_PROC_IS_APP) {
90
91
92 if (NULL == orte_process_info.my_daemon_uri) {
93 return ORTE_ERR_TAKE_NEXT_OPTION;
94 }
95
96 lifeline = ORTE_PROC_MY_DAEMON;
97 orte_routing_is_enabled = true;
98 }
99
100
101 OBJ_CONSTRUCT(&my_children, opal_list_t);
102
103 return ORTE_SUCCESS;
104 }
105
106 static int finalize(void)
107 {
108 OPAL_LIST_DESTRUCT(&my_children);
109 return ORTE_SUCCESS;
110 }
111
112 static int delete_route(orte_process_name_t *proc)
113 {
114 OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
115 "%s routed_direct_delete_route for %s",
116 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
117 ORTE_NAME_PRINT(proc)));
118
119
120
121 return ORTE_SUCCESS;
122 }
123
124 static int update_route(orte_process_name_t *target,
125 orte_process_name_t *route)
126 {
127 OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
128 "%s routed_direct_update: %s --> %s",
129 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
130 ORTE_NAME_PRINT(target),
131 ORTE_NAME_PRINT(route)));
132
133
134
135 return ORTE_SUCCESS;
136 }
137
138
139 static orte_process_name_t get_route(orte_process_name_t *target)
140 {
141 orte_process_name_t *ret, daemon;
142
143 if (target->jobid == ORTE_JOBID_INVALID ||
144 target->vpid == ORTE_VPID_INVALID) {
145 ret = ORTE_NAME_INVALID;
146 goto found;
147 }
148
149
150 daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
151 daemon.vpid = ORTE_PROC_MY_DAEMON->vpid;
152
153 if (ORTE_PROC_IS_APP) {
154
155
156
157 if (NULL != orte_process_info.my_daemon_uri) {
158 ret = ORTE_PROC_MY_DAEMON;
159 } else {
160
161
162 ret = target;
163 }
164 goto found;
165 }
166
167
168
169
170 if (ORTE_PROC_IS_TOOL) {
171 if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
172 ret = target;
173 goto found;
174 } else {
175 ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid);
176 ret = &daemon;
177 goto found;
178 }
179 }
180
181
182 if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
183 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
184 "%s routing direct to the HNP",
185 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
186 ret = ORTE_PROC_MY_HNP;
187 goto found;
188 }
189
190 daemon.jobid = ORTE_PROC_MY_NAME->jobid;
191
192 if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
193 ret = ORTE_NAME_INVALID;
194 goto found;
195 }
196
197
198 if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
199 ret = target;
200 goto found;
201 }
202
203
204 ret = &daemon;
205
206 found:
207 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
208 "%s routed_direct_get(%s) --> %s",
209 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
210 ORTE_NAME_PRINT(target),
211 ORTE_NAME_PRINT(ret)));
212
213 return *ret;
214 }
215
216 static int route_lost(const orte_process_name_t *route)
217 {
218 opal_list_item_t *item;
219 orte_routed_tree_t *child;
220
221 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
222 "%s route to %s lost",
223 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
224 ORTE_NAME_PRINT(route)));
225
226
227
228
229
230
231 if (!orte_finalizing &&
232 NULL != lifeline &&
233 OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
234 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
235 "%s routed:direct: Connection to lifeline %s lost",
236 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
237 ORTE_NAME_PRINT(lifeline)));
238 return ORTE_ERR_FATAL;
239 }
240
241
242
243
244 if (ORTE_PROC_IS_HNP &&
245 route->jobid == ORTE_PROC_MY_NAME->jobid) {
246 for (item = opal_list_get_first(&my_children);
247 item != opal_list_get_end(&my_children);
248 item = opal_list_get_next(item)) {
249 child = (orte_routed_tree_t*)item;
250 if (child->vpid == route->vpid) {
251 opal_list_remove_item(&my_children, item);
252 OBJ_RELEASE(item);
253 return ORTE_SUCCESS;
254 }
255 }
256 }
257
258
259 return ORTE_SUCCESS;
260 }
261
262
263 static bool route_is_defined(const orte_process_name_t *target)
264 {
265
266 return true;
267 }
268
269 static int set_lifeline(orte_process_name_t *proc)
270 {
271 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
272 "%s routed:direct: set lifeline to %s",
273 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
274 ORTE_NAME_PRINT(proc)));
275 mylifeline = *proc;
276 lifeline = &mylifeline;
277 return ORTE_SUCCESS;
278 }
279
280 static void update_routing_plan(void)
281 {
282 orte_routed_tree_t *child;
283 int32_t i;
284 orte_job_t *jdata;
285 orte_proc_t *proc;
286
287 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
288 "%s routed:direct: update routing plan",
289 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
290
291 if (!ORTE_PROC_IS_HNP) {
292
293 return;
294 }
295
296
297 OPAL_LIST_DESTRUCT(&my_children);
298 OBJ_CONSTRUCT(&my_children, opal_list_t);
299
300
301 if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
302 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
303 return;
304 }
305 for (i=1; i < jdata->procs->size; i++) {
306 if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
307 continue;
308 }
309 child = OBJ_NEW(orte_routed_tree_t);
310 child->vpid = proc->name.vpid;
311 opal_list_append(&my_children, &child->super);
312 }
313
314 return;
315 }
316
317 static void get_routing_list(opal_list_t *coll)
318 {
319
320 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
321 "%s routed:direct: get routing list",
322 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
323
324
325
326
327 if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) {
328 return;
329 }
330
331 orte_routed_base_xcast_routing(coll, &my_children);
332 }
333
334 static size_t num_routes(void)
335 {
336 if (!ORTE_PROC_IS_HNP) {
337 return 0;
338 }
339 return opal_list_get_size(&my_children);
340 }
341
342 #if OPAL_ENABLE_FT_CR == 1
343 static int direct_ft_event(int state)
344 {
345 int ret, exit_status = ORTE_SUCCESS;
346
347
348 if(OPAL_CRS_CHECKPOINT == state) {
349 }
350
351 else if (OPAL_CRS_CONTINUE == state ) {
352 }
353 else if (OPAL_CRS_TERM == state ) {
354
355 }
356 else {
357
358 }
359
360 cleanup:
361 return exit_status;
362 }
363 #endif