1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "opal_config.h"
28 #include "opal/mca/crs/crs.h"
29 #include "opal/mca/event/event.h"
30 #include "opal/util/output.h"
31 #include "opal/prefetch.h"
32
33 #ifndef OPAL_CR_H
34 #define OPAL_CR_H
35
36
37 BEGIN_C_DECLS
38
39
40
41
42 #define OPAL_CR_DONE ((char) 0)
43 #define OPAL_CR_ACK ((char) 1)
44 #define OPAL_CR_CHECKPOINT ((char) 2)
45 #define OPAL_CR_NAMED_PROG_R ("opal_cr_prog_read")
46 #define OPAL_CR_NAMED_PROG_W ("opal_cr_prog_write")
47 #define OPAL_CR_BASE_ENV_NAME ("opal_cr_restart-env")
48
49
50
51
52 enum opal_cr_ckpt_cmd_state_t {
53 OPAL_CHECKPOINT_CMD_START,
54 OPAL_CHECKPOINT_CMD_IN_PROGRESS,
55 OPAL_CHECKPOINT_CMD_NULL,
56 OPAL_CHECKPOINT_CMD_ERROR,
57
58 OPAL_CR_STATUS_NONE,
59 OPAL_CR_STATUS_REQUESTED,
60 OPAL_CR_STATUS_RUNNING,
61 OPAL_CR_STATUS_TERM,
62
63 OPAL_CR_STATUS_CONTINUE,
64
65 OPAL_CR_STATUS_RESTART_PRE,
66 OPAL_CR_STATUS_RESTART_POST
67 };
68 typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t;
69
70
71
72 OPAL_DECLSPEC extern int opal_cr_output;
73
74
75
76 OPAL_DECLSPEC extern char * opal_cr_pipe_dir;
77
78
79
80 OPAL_DECLSPEC extern int opal_cr_entry_point_signal;
81
82
83 OPAL_DECLSPEC extern bool opal_cr_is_enabled;
84
85
86
87 OPAL_DECLSPEC extern bool opal_cr_is_tool;
88
89
90 OPAL_DECLSPEC extern int opal_cr_checkpoint_request;
91
92
93 OPAL_DECLSPEC extern int opal_cr_checkpointing_state;
94
95
96
97
98
99 OPAL_DECLSPEC extern bool opal_cr_continue_like_restart;
100
101 #if OPAL_ENABLE_CRDEBUG == 1
102
103 OPAL_DECLSPEC extern int MPIR_debug_with_checkpoint;
104
105
106
107
108 OPAL_DECLSPEC int opal_cr_debug_set_current_ckpt_thread_self(void);
109 OPAL_DECLSPEC int opal_cr_debug_clear_current_ckpt_thread(void);
110
111
112
113
114
115 OPAL_DECLSPEC int MPIR_checkpoint_debugger_detach(void);
116
117
118
119
120
121 OPAL_DECLSPEC void *MPIR_checkpoint_debugger_breakpoint(void);
122
123
124
125
126 OPAL_DECLSPEC void *MPIR_checkpoint_debugger_waitpoint(void);
127
128
129
130
131 OPAL_DECLSPEC void MPIR_checkpoint_debugger_signal_handler(int signo);
132 #endif
133
134
135
136
137 OPAL_DECLSPEC int opal_cr_refresh_environ(int prev_pid);
138
139
140
141
142
143
144
145 OPAL_DECLSPEC int opal_cr_set_enabled(bool);
146
147
148
149
150
151 OPAL_DECLSPEC int opal_cr_init(void);
152
153
154
155
156
157 OPAL_DECLSPEC int opal_cr_finalize(void);
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176 OPAL_DECLSPEC void opal_cr_test_if_checkpoint_ready(void);
177
178
179
180
181 OPAL_DECLSPEC extern bool opal_cr_stall_check;
182 OPAL_DECLSPEC extern bool opal_cr_currently_stalled;
183
184 #if OPAL_ENABLE_FT_THREAD == 1
185
186 OPAL_DECLSPEC void opal_cr_thread_init_library(void);
187 OPAL_DECLSPEC void opal_cr_thread_finalize_library(void);
188 OPAL_DECLSPEC void opal_cr_thread_abort_library(void);
189 OPAL_DECLSPEC void opal_cr_thread_enter_library(void);
190 OPAL_DECLSPEC void opal_cr_thread_exit_library(void);
191 OPAL_DECLSPEC void opal_cr_thread_noop_progress(void);
192 #endif
193
194
195
196
197 #if OPAL_ENABLE_FT == 0 || OPAL_ENABLE_FT_CR == 0
198 #define OPAL_CR_TEST_CHECKPOINT_READY() ;
199 #define OPAL_CR_TEST_CHECKPOINT_READY_STALL() ;
200 #define OPAL_CR_INIT_LIBRARY() ;
201 #define OPAL_CR_FINALIZE_LIBRARY() ;
202 #define OPAL_CR_ABORT_LIBRARY() ;
203 #define OPAL_CR_ENTER_LIBRARY() ;
204 #define OPAL_CR_EXIT_LIBRARY() ;
205 #define OPAL_CR_NOOP_PROGRESS() ;
206 #endif
207
208
209
210
211 #if OPAL_ENABLE_FT_CR == 1
212 #define OPAL_CR_TEST_CHECKPOINT_READY() \
213 { \
214 if(OPAL_UNLIKELY(opal_cr_is_enabled) ) { \
215 opal_cr_test_if_checkpoint_ready(); \
216 } \
217 }
218
219 #define OPAL_CR_TEST_CHECKPOINT_READY_STALL() \
220 { \
221 if(OPAL_UNLIKELY(opal_cr_is_enabled && !opal_cr_stall_check)) { \
222 opal_cr_test_if_checkpoint_ready(); \
223 } \
224 }
225
226
227 #if OPAL_ENABLE_FT_THREAD == 0
228 #define OPAL_CR_INIT_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY();
229 #define OPAL_CR_FINALIZE_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY();
230 #define OPAL_CR_ABORT_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY();
231 #define OPAL_CR_ENTER_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY();
232 #define OPAL_CR_EXIT_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY();
233 #define OPAL_CR_NOOP_PROGRESS() OPAL_CR_TEST_CHECKPOINT_READY();
234 #endif
235
236
237 #if OPAL_ENABLE_FT_THREAD == 1
238 #define OPAL_CR_INIT_LIBRARY() \
239 { \
240 opal_cr_thread_init_library(); \
241 }
242 #define OPAL_CR_FINALIZE_LIBRARY() \
243 { \
244 opal_cr_thread_finalize_library(); \
245 }
246 #define OPAL_CR_ABORT_LIBRARY() \
247 { \
248 opal_cr_thread_abort_library(); \
249 }
250 #define OPAL_CR_ENTER_LIBRARY() \
251 { \
252 opal_cr_thread_enter_library(); \
253 }
254 #define OPAL_CR_EXIT_LIBRARY() \
255 { \
256 opal_cr_thread_exit_library(); \
257 }
258 #define OPAL_CR_NOOP_PROGRESS() \
259 { \
260 opal_cr_thread_noop_progress(); \
261 }
262 #endif
263
264 #endif
265
266
267
268
269
270
271
272
273
274
275
276
277 typedef int (*opal_cr_notify_callback_fn_t) (opal_cr_ckpt_cmd_state_t);
278
279 OPAL_DECLSPEC int opal_cr_reg_notify_callback
280 (opal_cr_notify_callback_fn_t new_func,
281 opal_cr_notify_callback_fn_t *prev_func);
282
283
284
285
286
287
288
289 OPAL_DECLSPEC int opal_cr_inc_core(pid_t pid,
290 opal_crs_base_snapshot_t *snapshot,
291 opal_crs_base_ckpt_options_t *options,
292 int *state);
293
294 OPAL_DECLSPEC int opal_cr_inc_core_prep(void);
295 OPAL_DECLSPEC int opal_cr_inc_core_ckpt(pid_t pid,
296 opal_crs_base_snapshot_t *snapshot,
297 opal_crs_base_ckpt_options_t *options,
298 int *state);
299 OPAL_DECLSPEC int opal_cr_inc_core_recover(int state);
300
301
302
303
304
305 typedef enum {
306 OPAL_CR_INC_PRE_CRS_PRE_MPI = 0,
307 OPAL_CR_INC_PRE_CRS_POST_MPI = 1,
308 OPAL_CR_INC_CRS_PRE_CKPT = 2,
309 OPAL_CR_INC_CRS_POST_CKPT = 3,
310 OPAL_CR_INC_POST_CRS_PRE_MPI = 4,
311 OPAL_CR_INC_POST_CRS_POST_MPI = 5,
312 OPAL_CR_INC_MAX = 6
313 } opal_cr_user_inc_callback_event_t;
314
315 typedef enum {
316 OPAL_CR_INC_STATE_PREPARE = 0,
317 OPAL_CR_INC_STATE_CONTINUE = 1,
318 OPAL_CR_INC_STATE_RESTART = 2,
319 OPAL_CR_INC_STATE_ERROR = 3
320 } opal_cr_user_inc_callback_state_t;
321
322
323
324
325 typedef int (*opal_cr_user_inc_callback_fn_t)(opal_cr_user_inc_callback_event_t event,
326 opal_cr_user_inc_callback_state_t state);
327
328 OPAL_DECLSPEC int opal_cr_user_inc_register_callback
329 (opal_cr_user_inc_callback_event_t event,
330 opal_cr_user_inc_callback_fn_t function,
331 opal_cr_user_inc_callback_fn_t *prev_function);
332
333 OPAL_DECLSPEC int ompi_trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event,
334 opal_cr_user_inc_callback_state_t state);
335
336
337
338
339
340
341
342
343 typedef int (*opal_cr_coord_callback_fn_t) (int);
344
345
346
347
348
349 OPAL_DECLSPEC int opal_cr_reg_coord_callback
350 (opal_cr_coord_callback_fn_t new_func,
351 opal_cr_coord_callback_fn_t *prev_func);
352
353
354
355
356 OPAL_DECLSPEC int opal_cr_coord(int state);
357
358
359
360
361 OPAL_DECLSPEC void opal_cr_set_time(int idx);
362 OPAL_DECLSPEC void opal_cr_display_all_timers(void);
363 OPAL_DECLSPEC void opal_cr_clear_timers(void);
364
365 OPAL_DECLSPEC extern bool opal_cr_timing_enabled;
366 OPAL_DECLSPEC extern bool opal_cr_timing_barrier_enabled;
367 OPAL_DECLSPEC extern int opal_cr_timing_my_rank;
368 OPAL_DECLSPEC extern int opal_cr_timing_target_rank;
369
370
371 #define OPAL_CR_TIMER_ENTRY0 0
372 #define OPAL_CR_TIMER_ENTRY1 1
373 #define OPAL_CR_TIMER_ENTRY2 2
374 #define OPAL_CR_TIMER_CRCPBR0 3
375 #define OPAL_CR_TIMER_CRCP0 4
376 #define OPAL_CR_TIMER_CRCPBR1 5
377 #define OPAL_CR_TIMER_P2P0 6
378 #define OPAL_CR_TIMER_P2P1 7
379 #define OPAL_CR_TIMER_P2PBR0 8
380 #define OPAL_CR_TIMER_CORE0 9
381 #define OPAL_CR_TIMER_CORE1 10
382 #define OPAL_CR_TIMER_COREBR0 11
383 #define OPAL_CR_TIMER_P2P2 12
384 #define OPAL_CR_TIMER_P2PBR1 13
385 #define OPAL_CR_TIMER_P2P3 14
386 #define OPAL_CR_TIMER_P2PBR2 15
387 #define OPAL_CR_TIMER_CRCP1 16
388 #define OPAL_CR_TIMER_COREBR1 17
389 #define OPAL_CR_TIMER_CORE2 18
390 #define OPAL_CR_TIMER_ENTRY3 19
391 #define OPAL_CR_TIMER_ENTRY4 20
392 #define OPAL_CR_TIMER_MAX 21
393
394
395 #define OPAL_CR_CLEAR_TIMERS() \
396 { \
397 if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
398 opal_cr_clear_timers(); \
399 } \
400 }
401
402 #define OPAL_CR_SET_TIMER(idx) \
403 { \
404 if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
405 opal_cr_set_time(idx); \
406 } \
407 }
408
409 #define OPAL_CR_DISPLAY_ALL_TIMERS() \
410 { \
411 if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
412 opal_cr_display_all_timers(); \
413 } \
414 }
415
416 END_C_DECLS
417
418 #endif
419