1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 #include "opal_config.h"
  28 #include "opal/mca/crs/crs.h"
  29 #include "opal/mca/event/event.h"
  30 #include "opal/util/output.h"
  31 #include "opal/prefetch.h"
  32 
  33 #ifndef OPAL_CR_H
  34 #define OPAL_CR_H
  35 
  36 
  37 BEGIN_C_DECLS
  38 
  39 
  40 
  41 
  42 #define OPAL_CR_DONE       ((char) 0)
  43 #define OPAL_CR_ACK        ((char) 1)
  44 #define OPAL_CR_CHECKPOINT ((char) 2)
  45 #define OPAL_CR_NAMED_PROG_R  ("opal_cr_prog_read")
  46 #define OPAL_CR_NAMED_PROG_W  ("opal_cr_prog_write")
  47 #define OPAL_CR_BASE_ENV_NAME ("opal_cr_restart-env")
  48 
  49 
  50 
  51 
  52 enum opal_cr_ckpt_cmd_state_t {
  53     OPAL_CHECKPOINT_CMD_START,       
  54     OPAL_CHECKPOINT_CMD_IN_PROGRESS, 
  55     OPAL_CHECKPOINT_CMD_NULL,        
  56     OPAL_CHECKPOINT_CMD_ERROR,       
  57     
  58     OPAL_CR_STATUS_NONE,       
  59     OPAL_CR_STATUS_REQUESTED,  
  60     OPAL_CR_STATUS_RUNNING,    
  61     OPAL_CR_STATUS_TERM,       
  62     
  63     OPAL_CR_STATUS_CONTINUE,
  64     
  65     OPAL_CR_STATUS_RESTART_PRE,
  66     OPAL_CR_STATUS_RESTART_POST
  67 };
  68 typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t;
  69 
  70     
  71 
  72     OPAL_DECLSPEC extern int    opal_cr_output;
  73 
  74     
  75 
  76     OPAL_DECLSPEC extern char * opal_cr_pipe_dir;
  77 
  78     
  79 
  80     OPAL_DECLSPEC extern int    opal_cr_entry_point_signal;
  81 
  82     
  83     OPAL_DECLSPEC extern bool   opal_cr_is_enabled;
  84 
  85     
  86 
  87     OPAL_DECLSPEC extern bool   opal_cr_is_tool;
  88 
  89     
  90     OPAL_DECLSPEC extern int opal_cr_checkpoint_request;
  91 
  92     
  93     OPAL_DECLSPEC extern int opal_cr_checkpointing_state;
  94 
  95     
  96 
  97 
  98 
  99     OPAL_DECLSPEC extern bool opal_cr_continue_like_restart;
 100 
 101 #if OPAL_ENABLE_CRDEBUG == 1
 102     
 103     OPAL_DECLSPEC extern int MPIR_debug_with_checkpoint;
 104 
 105     
 106 
 107 
 108     OPAL_DECLSPEC int opal_cr_debug_set_current_ckpt_thread_self(void);
 109     OPAL_DECLSPEC int opal_cr_debug_clear_current_ckpt_thread(void);
 110 
 111     
 112 
 113 
 114 
 115     OPAL_DECLSPEC int MPIR_checkpoint_debugger_detach(void);
 116 
 117     
 118 
 119 
 120 
 121     OPAL_DECLSPEC void *MPIR_checkpoint_debugger_breakpoint(void);
 122 
 123     
 124 
 125 
 126     OPAL_DECLSPEC void *MPIR_checkpoint_debugger_waitpoint(void);
 127 
 128     
 129 
 130 
 131     OPAL_DECLSPEC void MPIR_checkpoint_debugger_signal_handler(int signo);
 132 #endif
 133 
 134     
 135 
 136 
 137     OPAL_DECLSPEC int opal_cr_refresh_environ(int prev_pid);
 138 
 139     
 140 
 141 
 142 
 143 
 144 
 145     OPAL_DECLSPEC int opal_cr_set_enabled(bool);
 146 
 147     
 148 
 149 
 150 
 151     OPAL_DECLSPEC int opal_cr_init(void);
 152 
 153     
 154 
 155 
 156 
 157     OPAL_DECLSPEC int opal_cr_finalize(void);
 158 
 159     
 160 
 161 
 162 
 163 
 164 
 165 
 166 
 167 
 168 
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176     OPAL_DECLSPEC void opal_cr_test_if_checkpoint_ready(void);
 177 
 178     
 179 
 180 
 181     OPAL_DECLSPEC extern bool opal_cr_stall_check;
 182     OPAL_DECLSPEC extern bool opal_cr_currently_stalled;
 183 
 184 #if OPAL_ENABLE_FT_THREAD == 1
 185     
 186     OPAL_DECLSPEC void opal_cr_thread_init_library(void);
 187     OPAL_DECLSPEC void opal_cr_thread_finalize_library(void);
 188     OPAL_DECLSPEC void opal_cr_thread_abort_library(void);
 189     OPAL_DECLSPEC void opal_cr_thread_enter_library(void);
 190     OPAL_DECLSPEC void opal_cr_thread_exit_library(void);
 191     OPAL_DECLSPEC void opal_cr_thread_noop_progress(void);
 192 #endif 
 193 
 194     
 195 
 196 
 197 #if OPAL_ENABLE_FT == 0 || OPAL_ENABLE_FT_CR == 0
 198 #define OPAL_CR_TEST_CHECKPOINT_READY() ;
 199 #define OPAL_CR_TEST_CHECKPOINT_READY_STALL() ;
 200 #define OPAL_CR_INIT_LIBRARY() ;
 201 #define OPAL_CR_FINALIZE_LIBRARY() ;
 202 #define OPAL_CR_ABORT_LIBRARY() ;
 203 #define OPAL_CR_ENTER_LIBRARY() ;
 204 #define OPAL_CR_EXIT_LIBRARY() ;
 205 #define OPAL_CR_NOOP_PROGRESS() ;
 206 #endif 
 207 
 208     
 209 
 210 
 211 #if OPAL_ENABLE_FT_CR == 1
 212 #define OPAL_CR_TEST_CHECKPOINT_READY()      \
 213   {                                          \
 214     if(OPAL_UNLIKELY(opal_cr_is_enabled) ) { \
 215       opal_cr_test_if_checkpoint_ready();    \
 216     }                                        \
 217   }
 218 
 219 #define OPAL_CR_TEST_CHECKPOINT_READY_STALL()        \
 220   {                                                  \
 221     if(OPAL_UNLIKELY(opal_cr_is_enabled && !opal_cr_stall_check)) { \
 222       opal_cr_test_if_checkpoint_ready();            \
 223     }                                                \
 224   }
 225 
 226 
 227 #if OPAL_ENABLE_FT_THREAD == 0
 228 #define OPAL_CR_INIT_LIBRARY()     OPAL_CR_TEST_CHECKPOINT_READY();
 229 #define OPAL_CR_FINALIZE_LIBRARY() OPAL_CR_TEST_CHECKPOINT_READY();
 230 #define OPAL_CR_ABORT_LIBRARY()    OPAL_CR_TEST_CHECKPOINT_READY();
 231 #define OPAL_CR_ENTER_LIBRARY()    OPAL_CR_TEST_CHECKPOINT_READY();
 232 #define OPAL_CR_EXIT_LIBRARY()     OPAL_CR_TEST_CHECKPOINT_READY();
 233 #define OPAL_CR_NOOP_PROGRESS()    OPAL_CR_TEST_CHECKPOINT_READY();
 234 #endif 
 235 
 236 
 237 #if OPAL_ENABLE_FT_THREAD == 1
 238 #define OPAL_CR_INIT_LIBRARY()    \
 239  {                                \
 240    opal_cr_thread_init_library(); \
 241  }
 242 #define OPAL_CR_FINALIZE_LIBRARY()    \
 243  {                                    \
 244    opal_cr_thread_finalize_library(); \
 245  }
 246 #define OPAL_CR_ABORT_LIBRARY()    \
 247  {                                 \
 248    opal_cr_thread_abort_library(); \
 249  }
 250 #define OPAL_CR_ENTER_LIBRARY()    \
 251  {                                 \
 252    opal_cr_thread_enter_library(); \
 253  }
 254 #define OPAL_CR_EXIT_LIBRARY()    \
 255  {                                \
 256    opal_cr_thread_exit_library(); \
 257  }
 258 #define OPAL_CR_NOOP_PROGRESS()    \
 259  {                                 \
 260    opal_cr_thread_noop_progress(); \
 261  }
 262 #endif 
 263 
 264 #endif 
 265 
 266     
 267 
 268 
 269     
 270 
 271 
 272     
 273 
 274 
 275 
 276 
 277     typedef int (*opal_cr_notify_callback_fn_t) (opal_cr_ckpt_cmd_state_t);
 278 
 279     OPAL_DECLSPEC int opal_cr_reg_notify_callback
 280     (opal_cr_notify_callback_fn_t new_func,
 281      opal_cr_notify_callback_fn_t *prev_func);
 282 
 283     
 284 
 285 
 286 
 287 
 288 
 289     OPAL_DECLSPEC int opal_cr_inc_core(pid_t pid,
 290                                        opal_crs_base_snapshot_t *snapshot,
 291                                        opal_crs_base_ckpt_options_t *options,
 292                                        int *state);
 293 
 294     OPAL_DECLSPEC int opal_cr_inc_core_prep(void);
 295     OPAL_DECLSPEC int opal_cr_inc_core_ckpt(pid_t pid,
 296                                             opal_crs_base_snapshot_t *snapshot,
 297                                             opal_crs_base_ckpt_options_t *options,
 298                                             int *state);
 299     OPAL_DECLSPEC int opal_cr_inc_core_recover(int state);
 300 
 301 
 302     
 303 
 304 
 305     typedef enum {
 306         OPAL_CR_INC_PRE_CRS_PRE_MPI   = 0,
 307         OPAL_CR_INC_PRE_CRS_POST_MPI  = 1,
 308         OPAL_CR_INC_CRS_PRE_CKPT      = 2,
 309         OPAL_CR_INC_CRS_POST_CKPT     = 3,
 310         OPAL_CR_INC_POST_CRS_PRE_MPI  = 4,
 311         OPAL_CR_INC_POST_CRS_POST_MPI = 5,
 312         OPAL_CR_INC_MAX               = 6
 313     } opal_cr_user_inc_callback_event_t;
 314 
 315     typedef enum {
 316         OPAL_CR_INC_STATE_PREPARE  = 0,
 317         OPAL_CR_INC_STATE_CONTINUE = 1,
 318         OPAL_CR_INC_STATE_RESTART  = 2,
 319         OPAL_CR_INC_STATE_ERROR    = 3
 320     } opal_cr_user_inc_callback_state_t;
 321 
 322     
 323 
 324 
 325     typedef int (*opal_cr_user_inc_callback_fn_t)(opal_cr_user_inc_callback_event_t event,
 326                                                   opal_cr_user_inc_callback_state_t state);
 327 
 328     OPAL_DECLSPEC int opal_cr_user_inc_register_callback
 329                       (opal_cr_user_inc_callback_event_t event,
 330                        opal_cr_user_inc_callback_fn_t  function,
 331                        opal_cr_user_inc_callback_fn_t  *prev_function);
 332 
 333     OPAL_DECLSPEC int ompi_trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event,
 334                                                 opal_cr_user_inc_callback_state_t state);
 335 
 336 
 337     
 338 
 339 
 340     
 341 
 342 
 343     typedef int (*opal_cr_coord_callback_fn_t) (int);
 344 
 345     
 346 
 347 
 348 
 349      OPAL_DECLSPEC int opal_cr_reg_coord_callback
 350      (opal_cr_coord_callback_fn_t  new_func,
 351       opal_cr_coord_callback_fn_t *prev_func);
 352 
 353     
 354 
 355 
 356     OPAL_DECLSPEC int opal_cr_coord(int state);
 357 
 358     
 359 
 360 
 361     OPAL_DECLSPEC void opal_cr_set_time(int idx);
 362     OPAL_DECLSPEC void opal_cr_display_all_timers(void);
 363     OPAL_DECLSPEC void opal_cr_clear_timers(void);
 364 
 365     OPAL_DECLSPEC extern bool opal_cr_timing_enabled;
 366     OPAL_DECLSPEC extern bool opal_cr_timing_barrier_enabled;
 367     OPAL_DECLSPEC extern int  opal_cr_timing_my_rank;
 368     OPAL_DECLSPEC extern int  opal_cr_timing_target_rank;
 369 
 370 
 371 #define OPAL_CR_TIMER_ENTRY0    0
 372 #define OPAL_CR_TIMER_ENTRY1    1
 373 #define OPAL_CR_TIMER_ENTRY2    2
 374 #define OPAL_CR_TIMER_CRCPBR0   3
 375 #define OPAL_CR_TIMER_CRCP0     4
 376 #define OPAL_CR_TIMER_CRCPBR1   5
 377 #define OPAL_CR_TIMER_P2P0      6
 378 #define OPAL_CR_TIMER_P2P1      7
 379 #define OPAL_CR_TIMER_P2PBR0    8
 380 #define OPAL_CR_TIMER_CORE0     9
 381 #define OPAL_CR_TIMER_CORE1    10
 382 #define OPAL_CR_TIMER_COREBR0  11
 383 #define OPAL_CR_TIMER_P2P2     12
 384 #define OPAL_CR_TIMER_P2PBR1   13
 385 #define OPAL_CR_TIMER_P2P3     14
 386 #define OPAL_CR_TIMER_P2PBR2   15
 387 #define OPAL_CR_TIMER_CRCP1    16
 388 #define OPAL_CR_TIMER_COREBR1  17
 389 #define OPAL_CR_TIMER_CORE2    18
 390 #define OPAL_CR_TIMER_ENTRY3   19
 391 #define OPAL_CR_TIMER_ENTRY4   20
 392 #define OPAL_CR_TIMER_MAX      21
 393 
 394 
 395 #define OPAL_CR_CLEAR_TIMERS()                          \
 396     {                                                   \
 397         if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
 398             opal_cr_clear_timers();                     \
 399         }                                               \
 400     }
 401 
 402 #define OPAL_CR_SET_TIMER(idx)                          \
 403     {                                                   \
 404         if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
 405             opal_cr_set_time(idx);                      \
 406         }                                               \
 407     }
 408 
 409 #define OPAL_CR_DISPLAY_ALL_TIMERS()                    \
 410     {                                                   \
 411         if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
 412             opal_cr_display_all_timers();               \
 413         }                                               \
 414     }
 415 
 416 END_C_DECLS
 417 
 418 #endif 
 419