This source file includes following definitions.
- orte_snapc_full_orted_construct
- orte_snapc_full_orted_destruct
- orte_snapc_full_app_construct
- orte_snapc_full_app_destruct
- orte_snapc_full_component_query
- orte_snapc_full_module_init
- orte_snapc_full_module_finalize
- orte_snapc_full_setup_job
- orte_snapc_full_release_job
- orte_snapc_full_ft_event
- orte_snapc_full_start_ckpt
- orte_snapc_full_end_ckpt
- orte_snapc_full_request_op
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 #include "orte_config.h"
18
19 #include <sys/types.h>
20 #ifdef HAVE_UNISTD_H
21 #include <unistd.h>
22 #endif
23
24 #include "orte/mca/mca.h"
25 #include "opal/mca/base/base.h"
26
27 #include "opal/util/output.h"
28 #include "opal/util/opal_environ.h"
29
30 #include "orte/mca/snapc/snapc.h"
31 #include "orte/mca/snapc/base/base.h"
32
33 #include "snapc_full.h"
34
35
36
37
38 static orte_snapc_base_module_t loc_module = {
39
40 orte_snapc_full_module_init,
41
42 orte_snapc_full_module_finalize,
43 orte_snapc_full_setup_job,
44 orte_snapc_full_release_job,
45 orte_snapc_full_ft_event,
46 orte_snapc_full_start_ckpt,
47 orte_snapc_full_end_ckpt,
48 orte_snapc_full_request_op
49 };
50
51
52
53
54 void orte_snapc_full_orted_construct(orte_snapc_full_orted_snapshot_t *obj);
55 void orte_snapc_full_orted_destruct( orte_snapc_full_orted_snapshot_t *obj);
56
57 OBJ_CLASS_INSTANCE(orte_snapc_full_orted_snapshot_t,
58 orte_snapc_base_global_snapshot_t,
59 orte_snapc_full_orted_construct,
60 orte_snapc_full_orted_destruct);
61
62
63
64
65 void orte_snapc_full_app_construct(orte_snapc_full_app_snapshot_t *obj);
66 void orte_snapc_full_app_destruct( orte_snapc_full_app_snapshot_t *obj);
67
68 OBJ_CLASS_INSTANCE(orte_snapc_full_app_snapshot_t,
69 orte_snapc_base_local_snapshot_t,
70 orte_snapc_full_app_construct,
71 orte_snapc_full_app_destruct);
72
73
74
75
76
77
78
79
80
81 void orte_snapc_full_orted_construct(orte_snapc_full_orted_snapshot_t *snapshot) {
82 snapshot->process_name.jobid = 0;
83 snapshot->process_name.vpid = 0;
84
85 snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;
86 }
87
88 void orte_snapc_full_orted_destruct( orte_snapc_full_orted_snapshot_t *snapshot) {
89 snapshot->process_name.jobid = 0;
90 snapshot->process_name.vpid = 0;
91
92 snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;
93 }
94
95 void orte_snapc_full_app_construct(orte_snapc_full_app_snapshot_t *app_snapshot) {
96 app_snapshot->comm_pipe_r = NULL;
97 app_snapshot->comm_pipe_w = NULL;
98
99 app_snapshot->comm_pipe_r_fd = -1;
100 app_snapshot->comm_pipe_w_fd = -1;
101
102 app_snapshot->is_eh_active = false;
103 app_snapshot->unique_pipe_id = 0;
104
105 app_snapshot->process_pid = 0;
106
107 app_snapshot->migrating = false;
108
109 app_snapshot->finished = false;
110 }
111
112 void orte_snapc_full_app_destruct( orte_snapc_full_app_snapshot_t *app_snapshot) {
113 if( NULL != app_snapshot->comm_pipe_r ) {
114 free(app_snapshot->comm_pipe_r);
115 app_snapshot->comm_pipe_r = NULL;
116 }
117
118 if( NULL != app_snapshot->comm_pipe_w ) {
119 free(app_snapshot->comm_pipe_w);
120 app_snapshot->comm_pipe_w = NULL;
121 }
122
123 app_snapshot->comm_pipe_r_fd = -1;
124 app_snapshot->comm_pipe_w_fd = -1;
125
126 app_snapshot->is_eh_active = false;
127 app_snapshot->unique_pipe_id = 0;
128
129 app_snapshot->process_pid = 0;
130
131 app_snapshot->migrating = false;
132
133 app_snapshot->finished = false;
134 }
135
136
137
138
139 int orte_snapc_full_component_query(mca_base_module_t **module, int *priority)
140 {
141 opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
142 "snapc:full: component_query()");
143
144 *priority = mca_snapc_full_component.super.priority;
145 *module = (mca_base_module_t *)&loc_module;
146
147 return ORTE_SUCCESS;
148 }
149
150 int orte_snapc_full_module_init(bool seed, bool app)
151 {
152 int ret, exit_status = ORTE_SUCCESS;
153
154 opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
155 "snapc:full: module_init(%d, %d)", seed, app);
156
157
158
159
160 if(seed) {
161 opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
162 "snapc:full: module_init: Global Snapshot Coordinator");
163
164 orte_snapc_coord_type |= ORTE_SNAPC_GLOBAL_COORD_TYPE;
165
166 if( ORTE_SUCCESS != (ret = global_coord_init()) ) {
167 exit_status = ret;
168 goto cleanup;
169 }
170 }
171
172
173
174 else if(!seed && !app) {
175
176
177
178
179
180
181 opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
182 "snapc:full: module_init: Local Snapshot Coordinator");
183
184 orte_snapc_coord_type |= ORTE_SNAPC_LOCAL_COORD_TYPE;
185
186 if( ORTE_SUCCESS != (ret = local_coord_init()) ) {
187 exit_status = ret;
188 goto cleanup;
189 }
190 }
191
192
193
194 else if(app) {
195
196
197
198 if(!orte_initialized) {
199 goto cleanup;
200 }
201
202 opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
203 "snapc:full: module_init: Application Snapshot Coordinator");
204
205 orte_snapc_coord_type |= ORTE_SNAPC_APP_COORD_TYPE;
206
207 if( ORTE_SUCCESS != (ret = app_coord_init()) ) {
208 exit_status = ret;
209 goto cleanup;
210 }
211 }
212 else {
213
214
215
216 opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
217 "snapc:full: module_init: Unknown Snapshot Coordinator");
218
219 orte_snapc_coord_type = ORTE_SNAPC_UNASSIGN_TYPE;
220
221 exit_status = ORTE_ERROR;
222 goto cleanup;
223 }
224
225 cleanup:
226 return exit_status;
227 }
228
229 int orte_snapc_full_module_finalize(void)
230 {
231 opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
232 "snapc:full: module_finalize()");
233
234 switch(orte_snapc_coord_type)
235 {
236 case ORTE_SNAPC_GLOBAL_COORD_TYPE:
237 global_coord_finalize();
238 break;
239 case ORTE_SNAPC_LOCAL_COORD_TYPE:
240 local_coord_finalize();
241 break;
242 case ORTE_SNAPC_APP_COORD_TYPE:
243 app_coord_finalize();
244 break;
245 default:
246 break;
247 }
248
249 orte_snapc_coord_type = ORTE_SNAPC_UNASSIGN_TYPE;
250
251 return ORTE_SUCCESS;
252 }
253
254 int orte_snapc_full_setup_job(orte_jobid_t jobid) {
255 int ret, exit_status = ORTE_SUCCESS;
256
257 if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) {
258 if(ORTE_SUCCESS != (ret = global_coord_setup_job(jobid) ) ) {
259 exit_status = ret;
260 }
261 }
262 else if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE)) {
263 if(ORTE_SUCCESS != (ret = local_coord_setup_job(jobid) ) ) {
264 exit_status = ret;
265 }
266 }
267
268 return exit_status;
269 }
270
271 int orte_snapc_full_release_job(orte_jobid_t jobid) {
272 int ret, exit_status = ORTE_SUCCESS;
273
274 if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) {
275 if(ORTE_SUCCESS != (ret = global_coord_release_job(jobid) ) ) {
276 exit_status = ret;
277 }
278 }
279 else if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE )) {
280 if(ORTE_SUCCESS != (ret = local_coord_release_job(jobid) ) ) {
281 exit_status = ret;
282 }
283 }
284
285 return exit_status;
286 }
287
288 int orte_snapc_full_ft_event(int state) {
289 switch(orte_snapc_coord_type)
290 {
291 case ORTE_SNAPC_GLOBAL_COORD_TYPE:
292 case ORTE_SNAPC_LOCAL_COORD_TYPE:
293 ;
294 break;
295 case ORTE_SNAPC_APP_COORD_TYPE:
296 return app_coord_ft_event(state);
297 break;
298 default:
299 break;
300 }
301
302 return ORTE_SUCCESS;
303 }
304
305 int orte_snapc_full_start_ckpt(orte_snapc_base_quiesce_t *datum)
306 {
307 switch(orte_snapc_coord_type)
308 {
309 case ORTE_SNAPC_GLOBAL_COORD_TYPE:
310 return global_coord_start_ckpt(datum);
311 break;
312 case ORTE_SNAPC_LOCAL_COORD_TYPE:
313 ;
314 break;
315 case ORTE_SNAPC_APP_COORD_TYPE:
316 ;
317 break;
318 default:
319 break;
320 }
321
322 return ORTE_SUCCESS;
323 }
324
325 int orte_snapc_full_end_ckpt(orte_snapc_base_quiesce_t *datum)
326 {
327 switch(orte_snapc_coord_type)
328 {
329 case ORTE_SNAPC_GLOBAL_COORD_TYPE:
330 return global_coord_end_ckpt(datum);
331 break;
332 case ORTE_SNAPC_LOCAL_COORD_TYPE:
333 ;
334 break;
335 case ORTE_SNAPC_APP_COORD_TYPE:
336 ;
337 break;
338 default:
339 break;
340 }
341
342 return ORTE_SUCCESS;
343 }
344
345 int orte_snapc_full_request_op(orte_snapc_base_request_op_t *datum)
346 {
347 switch(orte_snapc_coord_type)
348 {
349 case ORTE_SNAPC_GLOBAL_COORD_TYPE:
350 ;
351 break;
352 case ORTE_SNAPC_LOCAL_COORD_TYPE:
353 ;
354 break;
355 case ORTE_SNAPC_APP_COORD_TYPE:
356 return app_coord_request_op(datum);
357 break;
358 default:
359 break;
360 }
361
362 return ORTE_SUCCESS;
363 }
364
365
366
367