1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 #ifndef MCA_SNAPC_H
76 #define MCA_SNAPC_H
77
78 #include "orte_config.h"
79 #include "orte/constants.h"
80 #include "orte/types.h"
81
82 #include "orte/mca/mca.h"
83 #include "opal/mca/base/base.h"
84 #include "opal/mca/crs/crs.h"
85 #include "opal/mca/crs/base/base.h"
86
87 #include "opal/class/opal_object.h"
88 #include "opal/class/opal_pointer_array.h"
89 #include "opal/util/output.h"
90
91 #include "orte/mca/sstore/sstore.h"
92
93 BEGIN_C_DECLS
94
95
96
97
98
99 #define ORTE_SNAPC_CKPT_STATE_ERROR 0
100
101
102 #define ORTE_SNAPC_CKPT_STATE_NONE 1
103
104 #define ORTE_SNAPC_CKPT_STATE_REQUEST 2
105
106 #define ORTE_SNAPC_CKPT_STATE_PENDING 3
107
108 #define ORTE_SNAPC_CKPT_STATE_RUNNING 4
109
110 #define ORTE_SNAPC_CKPT_STATE_INC_PREPED 5
111
112 #define ORTE_SNAPC_CKPT_STATE_STOPPED 6
113
114 #define ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL 7
115
116 #define ORTE_SNAPC_CKPT_STATE_MIGRATING 8
117
118 #define ORTE_SNAPC_CKPT_STATE_ESTABLISHED 9
119
120 #define ORTE_SNAPC_CKPT_STATE_RECOVERED 10
121
122 #define ORTE_SNAPC_CKPT_STATE_NO_CKPT 11
123
124 #define ORTE_SNAPC_CKPT_STATE_NO_RESTART 12
125 #define ORTE_SNAPC_CKPT_MAX 13
126
127
128
129
130
131 #define ORTE_SNAPC_CKPT_SHIFT 131072
132
133
134 #define ORTE_SNAPC_CKPT_NOTIFY(state) (ORTE_SNAPC_CKPT_SHIFT + state)
135
136
137 #define ORTE_SNAPC_CKPT_STATE(state) (state - ORTE_SNAPC_CKPT_SHIFT)
138
139
140 #define CHECK_ORTE_SNAPC_CKPT_STATE(state) (state >= ORTE_SNAPC_CKPT_SHIFT)
141
142
143
144
145
146
147 struct orte_snapc_base_local_snapshot_1_0_0_t {
148
149 opal_list_item_t super;
150
151
152 orte_process_name_t process_name;
153
154
155 int state;
156
157
158 orte_sstore_base_handle_t ss_handle;
159 };
160 typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_1_0_0_t;
161 typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_t;
162
163 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_local_snapshot_t);
164
165
166
167
168
169
170 struct orte_snapc_base_global_snapshot_1_0_0_t {
171
172 opal_list_item_t super;
173
174
175 opal_list_t local_snapshots;
176
177
178 opal_crs_base_ckpt_options_t *options;
179
180
181 orte_sstore_base_handle_t ss_handle;
182 };
183 typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_1_0_0_t;
184 typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_t;
185
186 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_global_snapshot_t);
187
188 struct orte_snapc_base_quiesce_1_0_0_t {
189
190 opal_object_t super;
191
192
193 int epoch;
194
195 char * crs_name;
196
197 char * handle;
198
199 orte_snapc_base_global_snapshot_t *snapshot;
200
201
202 orte_sstore_base_handle_t ss_handle;
203
204 orte_sstore_base_global_snapshot_info_t *ss_snapshot;
205
206
207 char * target_dir;
208
209 char * cmdline;
210
211 opal_crs_state_type_t cr_state;
212
213 bool checkpointing;
214
215 bool restarting;
216
217
218 bool migrating;
219
220 int num_migrating;
221 opal_pointer_array_t migrating_procs;
222 };
223 typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_1_0_0_t;
224 typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_t;
225
226 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_quiesce_t);
227
228
229
230
231 typedef enum {
232 ORTE_SNAPC_OP_NONE = 0,
233 ORTE_SNAPC_OP_INIT,
234 ORTE_SNAPC_OP_FIN,
235 ORTE_SNAPC_OP_FIN_ACK,
236 ORTE_SNAPC_OP_CHECKPOINT,
237 ORTE_SNAPC_OP_RESTART,
238 ORTE_SNAPC_OP_MIGRATE,
239 ORTE_SNAPC_OP_QUIESCE_START,
240 ORTE_SNAPC_OP_QUIESCE_CHECKPOINT,
241 ORTE_SNAPC_OP_QUIESCE_END
242 } orte_snapc_base_request_op_event_t;
243
244 struct orte_snapc_base_request_op_1_0_0_t {
245
246 opal_object_t super;
247
248
249 orte_snapc_base_request_op_event_t event;
250
251
252 bool is_active;
253
254
255 int leader;
256
257
258 int seq_num;
259
260
261 char * global_handle;
262
263
264 orte_sstore_base_handle_t ss_handle;
265
266
267 int mig_num;
268 int *mig_vpids;
269
270
271 char (*mig_host_pref)[OPAL_MAX_PROCESSOR_NAME];
272
273
274 int *mig_vpid_pref;
275
276
277 int *mig_off_node;
278 };
279 typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_1_0_0_t;
280 typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_t;
281
282 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_request_op_t);
283
284
285
286
287
288 typedef int (*orte_snapc_base_module_init_fn_t)
289 (bool seed, bool app);
290
291
292
293
294
295 typedef int (*orte_snapc_base_module_finalize_fn_t)
296 (void);
297
298
299
300
301
302 typedef int (*orte_snapc_base_setup_job_fn_t)
303 (orte_jobid_t jobid);
304
305
306
307
308
309 typedef int (*orte_snapc_base_release_job_fn_t)
310 (orte_jobid_t jobid);
311
312
313
314
315
316
317
318
319
320
321 typedef int (*orte_snapc_base_ft_event_fn_t)(int state);
322
323
324
325
326
327
328
329
330
331
332
333 typedef int (*orte_snapc_base_start_checkpoint_fn_t)
334 (orte_snapc_base_quiesce_t *datum);
335
336
337
338
339
340
341
342 typedef int (*orte_snapc_base_end_checkpoint_fn_t)
343 (orte_snapc_base_quiesce_t *datum);
344
345
346
347
348 typedef int (*orte_snapc_base_request_op_fn_t)
349 (orte_snapc_base_request_op_t *datum);
350
351
352
353
354 struct orte_snapc_base_component_2_0_0_t {
355
356 mca_base_component_t base_version;
357
358 mca_base_component_data_t base_data;
359
360
361 int verbose;
362
363 int output_handle;
364
365 int priority;
366 };
367 typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_2_0_0_t;
368 typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_t;
369
370
371
372
373 struct orte_snapc_base_module_1_0_0_t {
374
375 orte_snapc_base_module_init_fn_t snapc_init;
376
377 orte_snapc_base_module_finalize_fn_t snapc_finalize;
378
379 orte_snapc_base_setup_job_fn_t setup_job;
380
381 orte_snapc_base_release_job_fn_t release_job;
382
383 orte_snapc_base_ft_event_fn_t ft_event;
384
385 orte_snapc_base_start_checkpoint_fn_t start_ckpt;
386 orte_snapc_base_end_checkpoint_fn_t end_ckpt;
387
388 orte_snapc_base_request_op_fn_t request_op;
389 };
390 typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_1_0_0_t;
391 typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_t;
392
393 ORTE_DECLSPEC extern orte_snapc_base_module_t orte_snapc;
394 ORTE_DECLSPEC extern orte_snapc_base_component_t orte_snapc_base_selected_component;
395
396
397
398
399 #define ORTE_SNAPC_BASE_VERSION_2_0_0 \
400 ORTE_MCA_BASE_VERSION_2_1_0("snapc", 2, 0, 0)
401
402 END_C_DECLS
403
404 #endif
405