This source file includes following definitions.
- init
- finalize
- allocation_complete
- map_complete
- vm_ready
1
2
3
4
5
6
7
8
9
10
11
12 #include "orte_config.h"
13
14 #include <sys/types.h>
15 #ifdef HAVE_UNISTD_H
16 #include <unistd.h>
17 #endif
18 #include <string.h>
19
20 #include "opal/util/output.h"
21
22 #include "orte/mca/errmgr/errmgr.h"
23 #include "orte/mca/iof/iof.h"
24 #include "orte/mca/plm/base/base.h"
25 #include "orte/mca/ras/base/base.h"
26 #include "orte/mca/rmaps/base/base.h"
27 #include "orte/mca/routed/routed.h"
28 #include "orte/util/session_dir.h"
29 #include "orte/util/threads.h"
30 #include "orte/runtime/orte_quit.h"
31
32 #include "orte/mca/state/state.h"
33 #include "orte/mca/state/base/base.h"
34 #include "orte/mca/state/base/state_private.h"
35 #include "state_novm.h"
36
37
38
39
40 static int init(void);
41 static int finalize(void);
42
43
44
45
46
47
48
49 orte_state_base_module_t orte_state_novm_module = {
50 init,
51 finalize,
52 orte_state_base_activate_job_state,
53 orte_state_base_add_job_state,
54 orte_state_base_set_job_state_callback,
55 orte_state_base_set_job_state_priority,
56 orte_state_base_remove_job_state,
57 orte_state_base_activate_proc_state,
58 orte_state_base_add_proc_state,
59 orte_state_base_set_proc_state_callback,
60 orte_state_base_set_proc_state_priority,
61 orte_state_base_remove_proc_state
62 };
63
64 static void allocation_complete(int fd, short args, void *cbdata);
65 static void map_complete(int fd, short args, void *cbdata);
66 static void vm_ready(int fd, short args, void *cbdata);
67
68
69
70
71 static orte_job_state_t launch_states[] = {
72 ORTE_JOB_STATE_INIT,
73 ORTE_JOB_STATE_INIT_COMPLETE,
74 ORTE_JOB_STATE_ALLOCATE,
75 ORTE_JOB_STATE_ALLOCATION_COMPLETE,
76 ORTE_JOB_STATE_DAEMONS_LAUNCHED,
77 ORTE_JOB_STATE_DAEMONS_REPORTED,
78 ORTE_JOB_STATE_VM_READY,
79 ORTE_JOB_STATE_MAP,
80 ORTE_JOB_STATE_MAP_COMPLETE,
81 ORTE_JOB_STATE_SYSTEM_PREP,
82 ORTE_JOB_STATE_LAUNCH_APPS,
83 ORTE_JOB_STATE_SEND_LAUNCH_MSG,
84 ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
85 ORTE_JOB_STATE_RUNNING,
86 ORTE_JOB_STATE_REGISTERED,
87
88 ORTE_JOB_STATE_TERMINATED,
89 ORTE_JOB_STATE_NOTIFY_COMPLETED,
90 ORTE_JOB_STATE_ALL_JOBS_COMPLETE,
91 ORTE_JOB_STATE_DAEMONS_TERMINATED
92 };
93 static orte_state_cbfunc_t launch_callbacks[] = {
94 orte_plm_base_setup_job,
95 orte_plm_base_setup_job_complete,
96 orte_ras_base_allocate,
97 allocation_complete,
98 orte_plm_base_daemons_launched,
99 orte_plm_base_daemons_reported,
100 vm_ready,
101 orte_rmaps_base_map_job,
102 map_complete,
103 orte_plm_base_complete_setup,
104 orte_plm_base_launch_apps,
105 orte_plm_base_send_launch_msg,
106 orte_state_base_local_launch_complete,
107 orte_plm_base_post_launch,
108 orte_plm_base_registered,
109 orte_state_base_check_all_complete,
110 orte_state_base_cleanup_job,
111 orte_quit,
112 orte_quit
113 };
114
115 static orte_proc_state_t proc_states[] = {
116 ORTE_PROC_STATE_RUNNING,
117 ORTE_PROC_STATE_REGISTERED,
118 ORTE_PROC_STATE_IOF_COMPLETE,
119 ORTE_PROC_STATE_WAITPID_FIRED,
120 ORTE_PROC_STATE_TERMINATED
121 };
122 static orte_state_cbfunc_t proc_callbacks[] = {
123 orte_state_base_track_procs,
124 orte_state_base_track_procs,
125 orte_state_base_track_procs,
126 orte_state_base_track_procs,
127 orte_state_base_track_procs
128 };
129
130
131
132
133 static int init(void)
134 {
135 int i, rc;
136 int num_states;
137
138
139 OBJ_CONSTRUCT(&orte_job_states, opal_list_t);
140 OBJ_CONSTRUCT(&orte_proc_states, opal_list_t);
141
142
143 num_states = sizeof(launch_states) / sizeof(orte_job_state_t);
144 for (i=0; i < num_states; i++) {
145 if (ORTE_SUCCESS != (rc = orte_state.add_job_state(launch_states[i],
146 launch_callbacks[i],
147 ORTE_SYS_PRI))) {
148 ORTE_ERROR_LOG(rc);
149 }
150 }
151
152 if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_FORCED_EXIT,
153 orte_quit, ORTE_ERROR_PRI))) {
154 ORTE_ERROR_LOG(rc);
155 }
156
157 if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_REPORT_PROGRESS,
158 orte_state_base_report_progress, ORTE_ERROR_PRI))) {
159 ORTE_ERROR_LOG(rc);
160 }
161 if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) {
162 orte_state_base_print_job_state_machine();
163 }
164
165
166
167
168 num_states = sizeof(proc_states) / sizeof(orte_proc_state_t);
169 for (i=0; i < num_states; i++) {
170 if (ORTE_SUCCESS != (rc = orte_state.add_proc_state(proc_states[i],
171 proc_callbacks[i],
172 ORTE_SYS_PRI))) {
173 ORTE_ERROR_LOG(rc);
174 }
175 }
176 if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) {
177 orte_state_base_print_proc_state_machine();
178 }
179
180 return ORTE_SUCCESS;
181 }
182
183 static int finalize(void)
184 {
185 opal_list_item_t *item;
186
187
188 while (NULL != (item = opal_list_remove_first(&orte_proc_states))) {
189 OBJ_RELEASE(item);
190 }
191 OBJ_DESTRUCT(&orte_proc_states);
192
193 return ORTE_SUCCESS;
194 }
195
196
197
198
199 static void allocation_complete(int fd, short args, void *cbdata)
200 {
201 orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
202 orte_job_t *jdata;
203 orte_job_t *daemons;
204 orte_topology_t *t;
205 orte_node_t *node;
206 int i;
207
208 ORTE_ACQUIRE_OBJECT(caddy);
209 jdata = state->jdata;
210
211 jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
212
213
214 if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
215 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
216 ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
217 goto done;
218 }
219
220 orte_set_attribute(&daemons->attributes, ORTE_JOB_NO_VM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
221
222
223
224
225 t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
226 for (i=1; i < orte_node_pool->size; i++) {
227 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
228 continue;
229 }
230 node->topology = t;
231 }
232 if (!orte_managed_allocation) {
233 if (NULL != orte_set_slots &&
234 0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) {
235 for (i=0; i < orte_node_pool->size; i++) {
236 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
237 continue;
238 }
239 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
240 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
241 "%s plm:base:setting slots for node %s by %s",
242 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
243 orte_plm_base_set_slots(node);
244 }
245 }
246 }
247 }
248
249
250 ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
251
252 done:
253
254 OBJ_RELEASE(state);
255 }
256
257
258 static void map_complete(int fd, short args, void *cbdata)
259 {
260 orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
261 orte_job_t *jdata;
262
263 ORTE_ACQUIRE_OBJECT(caddy);
264 jdata = state->jdata;
265
266 jdata->state = ORTE_JOB_STATE_MAP_COMPLETE;
267
268 ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
269
270
271 OBJ_RELEASE(state);
272 }
273
274 static void vm_ready(int fd, short args, void *cbdata)
275 {
276 orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
277 orte_job_t *jdata;
278
279 ORTE_ACQUIRE_OBJECT(caddy);
280 jdata = state->jdata;
281
282
283
284
285 jdata->state = ORTE_JOB_STATE_VM_READY;
286 ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SYSTEM_PREP);
287
288 OBJ_RELEASE(state);
289 }