root/opal/runtime/opal_cr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_cr_set_enabled
  2. opal_cr_register
  3. opal_cr_init
  4. opal_cr_finalize
  5. opal_cr_test_if_checkpoint_ready
  6. opal_cr_inc_core_prep
  7. opal_cr_inc_core_ckpt
  8. opal_cr_inc_core_recover
  9. opal_cr_inc_core
  10. opal_cr_coord
  11. opal_cr_reg_notify_callback
  12. opal_cr_user_inc_register_callback
  13. ompi_trigger_user_inc_callback
  14. opal_cr_reg_coord_callback
  15. opal_cr_refresh_environ
  16. extract_env_vars
  17. opal_cr_sigpipe_debug_signal_handler
  18. opal_cr_thread_fn
  19. opal_cr_thread_init_library
  20. opal_cr_thread_finalize_library
  21. opal_cr_thread_abort_library
  22. opal_cr_thread_enter_library
  23. opal_cr_thread_exit_library
  24. opal_cr_thread_noop_progress
  25. opal_cr_get_time
  26. opal_cr_set_time
  27. opal_cr_clear_timers
  28. display_indv_timer_core
  29. opal_cr_display_all_timers
  30. opal_cr_debug_set_current_ckpt_thread_self
  31. opal_cr_debug_clear_current_ckpt_thread
  32. MPIR_checkpoint_debugger_detach
  33. MPIR_checkpoint_debugger_signal_handler
  34. MPIR_checkpoint_debugger_waitpoint
  35. MPIR_checkpoint_debugger_breakpoint

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2012 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007-2017 Los Alamos National Security, LLC.  All rights
  13  *                         reserved.
  14  * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
  15  * Copyright (c) 2012-2013 Cisco Systems, Inc.  All rights reserved.
  16  * Copyright (c) 2015      Research Organization for Information Science
  17  *                         and Technology (RIST). All rights reserved.
  18  * Copyright (c) 2017      IBM Corporation. All rights reserved.
  19  * Copyright (c) 2018      Amazon.com, Inc. or its affiliates.  All Rights reserved.
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  */
  26 
  27 /** @file
  28  *
  29  * OPAL Layer Checkpoint/Restart Runtime functions
  30  *
  31  */
  32 
  33 #include "opal_config.h"
  34 
  35 #include <string.h>
  36 #include <errno.h>
  37 #ifdef HAVE_UNISTD_H
  38 #include <unistd.h>
  39 #endif  /* HAVE_UNISTD_H */
  40 #ifdef HAVE_FCNTL_H
  41 #include <fcntl.h>
  42 #endif  /* HAVE_FCNTL_H */
  43 #ifdef HAVE_SYS_TYPES_H
  44 #include <sys/types.h>
  45 #endif  /* HAVE_SYS_TYPES_H */
  46 #ifdef HAVE_SYS_STAT_H
  47 #include <sys/stat.h>  /* for mkfifo */
  48 #endif  /* HAVE_SYS_STAT_H */
  49 #include <signal.h>
  50 
  51 #include "opal/class/opal_object.h"
  52 #include "opal/util/opal_environ.h"
  53 #include "opal/util/show_help.h"
  54 #include "opal/util/output.h"
  55 #include "opal/util/malloc.h"
  56 #include "opal/util/keyval_parse.h"
  57 #include "opal/util/opal_environ.h"
  58 #include "opal/util/argv.h"
  59 #include "opal/util/printf.h"
  60 #include "opal/memoryhooks/memory.h"
  61 
  62 #include "opal/mca/base/base.h"
  63 #include "opal/runtime/opal_cr.h"
  64 #include "opal/runtime/opal.h"
  65 #include "opal/constants.h"
  66 
  67 #include "opal/mca/if/base/base.h"
  68 #include "opal/mca/memcpy/base/base.h"
  69 #include "opal/mca/memory/base/base.h"
  70 #include "opal/mca/timer/base/base.h"
  71 
  72 #include "opal/threads/mutex.h"
  73 #include "opal/threads/threads.h"
  74 #include "opal/mca/crs/base/base.h"
  75 
  76 /******************
  77  * Global Var Decls
  78  ******************/
  79 #if OPAL_ENABLE_CRDEBUG == 1
  80 static opal_thread_t **opal_cr_debug_free_threads = NULL;
  81 static int opal_cr_debug_num_free_threads = 0;
  82 static int opal_cr_debug_threads_already_waiting = false;
  83 
  84 int MPIR_debug_with_checkpoint = 0;
  85 static volatile int MPIR_checkpoint_debug_gate = 0;
  86 
  87 int    opal_cr_debug_signal     = 0;
  88 #endif
  89 
  90 bool opal_cr_stall_check       = false;
  91 bool opal_cr_currently_stalled = false;
  92 int  opal_cr_output = -1;
  93 int  opal_cr_verbose = 0;
  94 int opal_cr_initalized = 0;
  95 
  96 static double opal_cr_get_time(void);
  97 static void display_indv_timer_core(double diff, char *str);
  98 static double timer_start[OPAL_CR_TIMER_MAX];
  99 bool opal_cr_timing_barrier_enabled = false;
 100 bool opal_cr_timing_enabled = false;
 101 int  opal_cr_timing_my_rank = 0;
 102 int  opal_cr_timing_target_rank = 0;
 103 
 104 /******************
 105  * Local Functions & Var Decls
 106  ******************/
 107 static int extract_env_vars(int prev_pid, char * file_name);
 108 
 109 static void opal_cr_sigpipe_debug_signal_handler (int signo);
 110 
 111 static opal_cr_user_inc_callback_fn_t cur_user_coord_callback[OPAL_CR_INC_MAX] = {NULL};
 112 static opal_cr_coord_callback_fn_t  cur_coord_callback = NULL;
 113 static opal_cr_notify_callback_fn_t cur_notify_callback = NULL;
 114 
 115 static int core_prev_pid = 0;
 116 
 117 /******************
 118  * Interface Functions & Vars
 119  ******************/
 120 char * opal_cr_pipe_dir   = NULL;
 121 int    opal_cr_entry_point_signal     = 0;
 122 bool   opal_cr_is_enabled = true;
 123 bool   opal_cr_is_tool    = false;
 124 
 125 /* Current checkpoint state */
 126 int    opal_cr_checkpointing_state = OPAL_CR_STATUS_NONE;
 127 
 128 /* Current checkpoint request channel state */
 129 int    opal_cr_checkpoint_request  = OPAL_CR_STATUS_NONE;
 130 
 131 static bool   opal_cr_debug_sigpipe = false;
 132 
 133 bool opal_cr_continue_like_restart = false;
 134 
 135 #if OPAL_ENABLE_FT_THREAD == 1
 136 /*****************
 137  * Threading Functions and Variables
 138  *****************/
 139 static void* opal_cr_thread_fn(opal_object_t *obj);
 140 bool    opal_cr_thread_is_done    = false;
 141 bool    opal_cr_thread_is_active  = false;
 142 bool    opal_cr_thread_in_library = false;
 143 bool    opal_cr_thread_use_if_avail = true;
 144 int32_t opal_cr_thread_num_in_library = 0;
 145 int     opal_cr_thread_sleep_check = 0;
 146 int     opal_cr_thread_sleep_wait = 0;
 147 opal_thread_t opal_cr_thread;
 148 opal_mutex_t  opal_cr_thread_lock;
 149 #if 0
 150 #define OPAL_CR_LOCK()           opal_cr_thread_in_library = true;  opal_mutex_lock(&opal_cr_thread_lock);
 151 #define OPAL_CR_UNLOCK()         opal_cr_thread_in_library = false; opal_mutex_unlock(&opal_cr_thread_lock);
 152 #define OPAL_CR_THREAD_LOCK()    opal_mutex_lock(&opal_cr_thread_lock);
 153 #define OPAL_CR_THREAD_UNLOCK()  opal_mutex_unlock(&opal_cr_thread_lock);
 154 #else
 155 /* This technique will potentially starve the thread, but that is OK since
 156  * it is only there as support for when the process is not in the MPI library
 157  */
 158 static const uint32_t ThreadFlag = 0x1;
 159 static const uint32_t ProcInc    = 0x2;
 160 
 161 #define OPAL_CR_LOCK()                                            \
 162  {                                                                \
 163     opal_cr_thread_in_library = true;                             \
 164     OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, ProcInc);   \
 165     while( (opal_cr_thread_num_in_library & ThreadFlag ) != 0 ) { \
 166       sched_yield();                                              \
 167     }                                                             \
 168  }
 169 #define OPAL_CR_UNLOCK()                                         \
 170  {                                                               \
 171     OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ProcInc); \
 172     if( opal_cr_thread_num_in_library <= 0 ) {                   \
 173       opal_cr_thread_in_library = false;                         \
 174     }                                                            \
 175  }
 176 #define OPAL_CR_THREAD_LOCK()                                           \
 177     {                                                                   \
 178       int32_t _tmp_value = 0;                                           \
 179       while(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&opal_cr_thread_num_in_library, &_tmp_value, ThreadFlag)) { \
 180           if( !opal_cr_thread_is_active && opal_cr_thread_is_done) {    \
 181               break;                                                    \
 182           }                                                             \
 183           sched_yield();                                                \
 184           usleep(opal_cr_thread_sleep_check);                           \
 185       }                                                                 \
 186  }
 187 #define OPAL_CR_THREAD_UNLOCK()                                     \
 188  {                                                                  \
 189     OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ThreadFlag); \
 190  }
 191 #endif
 192 
 193 #endif /* OPAL_ENABLE_FT_THREAD == 1 */
 194 
 195 int opal_cr_set_enabled(bool en)
 196 {
 197     opal_cr_is_enabled = en;
 198     return OPAL_SUCCESS;
 199 }
 200 
 201 static int opal_cr_register (void)
 202 {
 203     int ret;
 204 #if OPAL_ENABLE_CRDEBUG == 1
 205     int t;
 206 #endif
 207 
 208     /*
 209      * Some startup MCA parameters
 210      */
 211     ret = mca_base_var_register ("opal", "opal", "cr", "verbose",
 212                                  "Verbose output level for the runtime OPAL Checkpoint/Restart functionality",
 213                                  MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 214                                  OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL,
 215                                  &opal_cr_verbose);
 216     if (0 > ret) {
 217         return ret;
 218     }
 219 
 220     opal_cr_is_enabled = false;
 221     (void) mca_base_var_register("opal", "ft", "cr", "enabled",
 222                                  "Enable fault tolerance for this program",
 223                                  MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 224                                  OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 225                                  &opal_cr_is_enabled);
 226 
 227     opal_cr_timing_enabled = false;
 228     (void) mca_base_var_register ("opal", "opal", "cr", "enable_timer",
 229                                   "Enable Checkpoint timer (Default: Disabled)",
 230                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 231                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 232                                   &opal_cr_timing_enabled);
 233 
 234     opal_cr_timing_barrier_enabled = false;
 235     (void) mca_base_var_register ("opal", "opal", "cr", "enable_timer_barrier",
 236                                   "Enable Checkpoint timer Barrier. Must have opal_cr_enable_timer set. (Default: Disabled)",
 237                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, opal_cr_timing_enabled ? MCA_BASE_VAR_FLAG_SETTABLE : 0,
 238                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 239                                   &opal_cr_timing_barrier_enabled);
 240     opal_cr_timing_barrier_enabled = opal_cr_timing_barrier_enabled && opal_cr_timing_enabled;
 241 
 242     (void) mca_base_var_register ("opal", "opal", "cr", "timer_target_rank",
 243                                   "Target Rank for the timer (Default: 0)",
 244                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 245                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 246                                   &opal_cr_timing_target_rank);
 247 
 248 #if OPAL_ENABLE_FT_THREAD == 1
 249     opal_cr_thread_use_if_avail = false;
 250     (void) mca_base_var_register ("opal", "opal", "cr", "use_thread",
 251                                   "Use an async thread to checkpoint this program (Default: Disabled)",
 252                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 253                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 254                                   &opal_cr_thread_use_if_avail);
 255 
 256     opal_cr_thread_sleep_check = 0;
 257     (void) mca_base_var_register ("opal", "opal", "cr", "thread_sleep_check",
 258                                   "Time to sleep between checking for a checkpoint (Default: 0)",
 259                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 260                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 261                                   &opal_cr_thread_sleep_check);
 262 
 263     opal_cr_thread_sleep_wait = 100;
 264     (void) mca_base_var_register ("opal", "opal", "cr", "thread_sleep_wait",
 265                                   "Time to sleep waiting for process to exit MPI library (Default: 1000)",
 266                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 267                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 268                                   &opal_cr_thread_sleep_wait);
 269 #endif
 270 
 271     opal_cr_is_tool = false;
 272     (void) mca_base_var_register ("opal", "opal", "cr", "is_tool",
 273                                   "Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.",
 274                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 275                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 276                                   &opal_cr_is_tool);
 277 
 278 #ifndef __WINDOWS__
 279     opal_cr_entry_point_signal = SIGUSR1;
 280     (void) mca_base_var_register ("opal", "opal", "cr", "signal",
 281                                   "Checkpoint/Restart signal used to initialize an OPAL Only checkpoint of a program",
 282                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 283                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 284                                   &opal_cr_entry_point_signal);
 285 
 286     opal_cr_debug_sigpipe = false;
 287     (void) mca_base_var_register ("opal", "opal", "cr", "debug_sigpipe",
 288                                   "Activate a signal handler for debugging SIGPIPE Errors that can happen on restart. (Default: Disabled)",
 289                                   MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 290                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 291                                   &opal_cr_debug_sigpipe);
 292 #else
 293     opal_cr_is_tool = true;  /* no support for CR on Windows yet */
 294 #endif  /* __WINDOWS__ */
 295 
 296 #if OPAL_ENABLE_CRDEBUG == 1
 297     MPIR_debug_with_checkpoint = 0;
 298     (void) mca_base_var_register ("opal", "opal", "cr", "enable_crdebug",
 299                                   "Enable checkpoint/restart debugging",
 300                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 301                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 302                                   &MPIR_debug_with_checkpoint);
 303 
 304     opal_cr_debug_num_free_threads = 3;
 305     opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
 306     for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
 307         opal_cr_debug_free_threads[t] = NULL;
 308     }
 309 
 310     opal_cr_debug_signal = SIGTSTP;
 311     (void) mca_base_var_register ("opal", "opal", "cr", "crdebug_signal",
 312                                   "Checkpoint/Restart signal used to hold threads when debugging",
 313                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 314                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 315                                   &opal_cr_debug_signal);
 316 #endif
 317 
 318     opal_cr_pipe_dir = (char *) opal_tmp_directory();
 319     (void) mca_base_var_register ("opal", "opal", "cr", "tmp_dir",
 320                                   "Temporary directory to place rendezvous files for a checkpoint",
 321                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 322                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 323                                   &opal_cr_pipe_dir);
 324 
 325     return OPAL_SUCCESS;
 326 }
 327 
 328 
 329 int opal_cr_init(void )
 330 {
 331     int ret, exit_status = OPAL_SUCCESS;
 332     opal_cr_coord_callback_fn_t prev_coord_func;
 333 
 334     if( ++opal_cr_initalized != 1 ) {
 335         if( opal_cr_initalized < 1 ) {
 336             exit_status = OPAL_ERROR;
 337             goto cleanup;
 338         }
 339         exit_status = OPAL_SUCCESS;
 340         goto cleanup;
 341     }
 342 
 343     ret = opal_cr_register ();
 344     if (OPAL_SUCCESS != ret) {
 345         return ret;
 346     }
 347 
 348     if(0 != opal_cr_verbose) {
 349         opal_cr_output = opal_output_open(NULL);
 350         opal_output_set_verbosity(opal_cr_output, opal_cr_verbose);
 351     }
 352 
 353     opal_output_verbose(10, opal_cr_output,
 354                         "opal_cr: init: Verbose Level: %d",
 355                         opal_cr_verbose);
 356 
 357 
 358     opal_output_verbose(10, opal_cr_output,
 359                         "opal_cr: init: FT Enabled: %s",
 360                         opal_cr_is_enabled ? "true" : "false");
 361 
 362 
 363     opal_output_verbose(10, opal_cr_output,
 364                         "opal_cr: init: Is a tool program: %s",
 365                         opal_cr_is_tool ? "true" : "false");
 366 
 367     opal_output_verbose(10, opal_cr_output,
 368                         "opal_cr: init: Debug SIGPIPE: %d (%s)",
 369                         opal_cr_verbose, (opal_cr_debug_sigpipe ? "True" : "False"));
 370 
 371     opal_output_verbose(10, opal_cr_output,
 372                         "opal_cr: init: Checkpoint Signal: %d",
 373                         opal_cr_entry_point_signal);
 374 
 375 #if OPAL_ENABLE_FT_THREAD == 1
 376     opal_output_verbose(10, opal_cr_output,
 377                         "opal_cr: init: FT Use thread: %s",
 378                         opal_cr_thread_use_if_avail ? "true" : "false");
 379 
 380     opal_output_verbose(10, opal_cr_output,
 381                         "opal_cr: init: FT thread sleep: check = %d, wait = %d",
 382                         opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait);
 383 
 384     /* If we have a thread, then attach the SIGPIPE signal handler there since
 385      * it is most likely to be the one that needs it.
 386      */
 387     if( opal_cr_debug_sigpipe && !opal_cr_thread_use_if_avail ) {
 388         if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
 389             ;
 390         }
 391     }
 392 #else
 393     if( opal_cr_debug_sigpipe ) {
 394         if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
 395             ;
 396         }
 397     }
 398 #endif
 399 
 400 #if OPAL_ENABLE_CRDEBUG == 1
 401     opal_output_verbose(10, opal_cr_output,
 402                         "opal_cr: init: C/R Debugging Enabled [%s]\n",
 403                         (MPIR_debug_with_checkpoint ? "True": "False"));
 404 
 405     opal_output_verbose(10, opal_cr_output,
 406                         "opal_cr: init: Checkpoint Signal (Debug): %d",
 407                         opal_cr_debug_signal);
 408 
 409     if( SIG_ERR == signal(opal_cr_debug_signal, MPIR_checkpoint_debugger_signal_handler) ) {
 410         opal_output(opal_cr_output,
 411                     "opal_cr: init: Failed to register C/R debug signal (%d)",
 412                     opal_cr_debug_signal);
 413     }
 414 #endif
 415 
 416     opal_output_verbose(10, opal_cr_output,
 417                         "opal_cr: init: Temp Directory: %s",
 418                         opal_cr_pipe_dir);
 419 
 420     if( !opal_cr_is_tool ) {
 421         /* Register the OPAL interlevel coordination callback */
 422         opal_cr_reg_coord_callback(opal_cr_coord, &prev_coord_func);
 423 
 424         opal_cr_stall_check = false;
 425         opal_cr_currently_stalled = false;
 426 
 427     } /* End opal_cr_is_tool = true */
 428 
 429     /*
 430      * If fault tolerance was not compiled in then
 431      * we need to make sure that the listener thread is active to tell
 432      * the tools that this is not a checkpointable job.
 433      * We don't need the CRS framework to be initalized.
 434      */
 435 #if OPAL_ENABLE_FT_CR    == 1
 436     /*
 437      * Open the checkpoint / restart service components
 438      */
 439     if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_crs_base_framework, 0))) {
 440         opal_show_help( "help-opal-runtime.txt",
 441                         "opal_cr_init:no-crs", true,
 442                         "opal_crs_base_open", ret );
 443         exit_status = ret;
 444         goto cleanup;
 445     }
 446 
 447     if (OPAL_SUCCESS != (ret = opal_crs_base_select())) {
 448         opal_show_help( "help-opal-runtime.txt",
 449                         "opal_cr_init:no-crs", true,
 450                         "opal_crs_base_select", ret );
 451         exit_status = ret;
 452         goto cleanup;
 453     }
 454 #endif
 455 
 456 #if OPAL_ENABLE_FT_THREAD == 1
 457     if( !opal_cr_is_tool && opal_cr_thread_use_if_avail) {
 458         opal_output_verbose(10, opal_cr_output,
 459                             "opal_cr: init: starting the thread\n");
 460 
 461         /* JJH: We really do need this line below since it enables
 462          *      actual locks for threads. However currently the
 463          *      upper layers will deadlock if it is enabled.
 464          *      So hack around the problem for now, while working
 465          *      on a complete solution. See ticket #2741 for more
 466          *      details.
 467          * opal_set_using_threads(true);
 468          */
 469 
 470         /*
 471          * Start the thread
 472          */
 473         OBJ_CONSTRUCT(&opal_cr_thread,     opal_thread_t);
 474         OBJ_CONSTRUCT(&opal_cr_thread_lock, opal_mutex_t);
 475 
 476         opal_cr_thread_is_done    = false;
 477         opal_cr_thread_is_active  = false;
 478         opal_cr_thread_in_library = false;
 479         opal_cr_thread_num_in_library = 0;
 480 
 481         opal_cr_thread.t_run = opal_cr_thread_fn;
 482         opal_cr_thread.t_arg = NULL;
 483         opal_thread_start(&opal_cr_thread);
 484 
 485     } /* End opal_cr_is_tool = true */
 486     else {
 487         opal_output_verbose(10, opal_cr_output,
 488                             "opal_cr: init: *Not* Using C/R thread\n");
 489     }
 490 #endif /* OPAL_ENABLE_FT_THREAD == 1 */
 491 
 492  cleanup:
 493     return exit_status;
 494 }
 495 
 496 int opal_cr_finalize(void)
 497 {
 498     int exit_status = OPAL_SUCCESS;
 499 
 500     if( --opal_cr_initalized != 0 ) {
 501         if( opal_cr_initalized < 0 ) {
 502             return OPAL_ERROR;
 503         }
 504         return OPAL_SUCCESS;
 505     }
 506 
 507     if( !opal_cr_is_tool ) {
 508 #if OPAL_ENABLE_FT_THREAD == 1
 509         if( opal_cr_thread_use_if_avail ) {
 510             void *data;
 511             /*
 512              * Stop the thread
 513              */
 514             opal_cr_thread_is_done    = true;
 515             opal_cr_thread_is_active  = false;
 516             opal_cr_thread_in_library = true;
 517 
 518             opal_thread_join(&opal_cr_thread, &data);
 519             OBJ_DESTRUCT(&opal_cr_thread);
 520             OBJ_DESTRUCT(&opal_cr_thread_lock);
 521         }
 522 #endif /* OPAL_ENABLE_FT_THREAD == 1 */
 523 
 524         /* Nothing to do for just process notifications */
 525         opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM;
 526         opal_cr_checkpoint_request  = OPAL_CR_STATUS_TERM;
 527     }
 528 
 529 #if OPAL_ENABLE_CRDEBUG == 1
 530     if( NULL != opal_cr_debug_free_threads ) {
 531         free( opal_cr_debug_free_threads );
 532         opal_cr_debug_free_threads = NULL;
 533     }
 534     opal_cr_debug_num_free_threads = 0;
 535 #endif
 536 
 537     if (NULL != opal_cr_pipe_dir) {
 538         free(opal_cr_pipe_dir);
 539         opal_cr_pipe_dir = NULL;
 540     }
 541 
 542 #if OPAL_ENABLE_FT_CR    == 1
 543     /*
 544      * Close the checkpoint / restart service components
 545      */
 546     (void) mca_base_framework_close(&opal_crs_base_framework);
 547 #endif
 548 
 549     return exit_status;
 550 }
 551 
 552 /*
 553  * Check if a checkpoint request needs to be operated upon
 554  */
 555 void opal_cr_test_if_checkpoint_ready(void)
 556 {
 557     int ret;
 558 
 559     if( opal_cr_currently_stalled) {
 560         opal_output_verbose(20, opal_cr_output,
 561                             "opal_cr:opal_test_if_ready: JUMPING to Post Stall stage");
 562         goto STAGE_1;
 563     }
 564 
 565     /*
 566      * If there is no checkpoint request to act on
 567      * then just return
 568      */
 569     if(OPAL_CR_STATUS_REQUESTED != opal_cr_checkpoint_request ) {
 570         return;
 571     }
 572 
 573     /*
 574      * If we are currently checkpointing:
 575      *  - If a request is pending then cancel it
 576      *  - o.w., skip it.
 577      */
 578     if(OPAL_CR_STATUS_RUNNING == opal_cr_checkpointing_state ) {
 579         if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_IN_PROGRESS) ) ) {
 580             opal_output(opal_cr_output,
 581                         "Error: opal_cr: test_if_checkpoint_ready: Respond [In Progress] Failed. (%d)",
 582                         ret);
 583         }
 584         opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
 585         return;
 586     }
 587 
 588     /*
 589      * If no CRS module is loaded return an error
 590      */
 591     if (NULL == opal_crs.crs_checkpoint ) {
 592          if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_NULL) ) ) {
 593              opal_output(opal_cr_output,
 594                          "Error: opal_cr: test_if_checkpoint_ready: Respond [Not Able/NULL] Failed. (%d)",
 595                          ret);
 596          }
 597          opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
 598          return;
 599     }
 600 
 601     /*
 602      * Start the checkpoint
 603      */
 604     opal_cr_checkpointing_state = OPAL_CR_STATUS_RUNNING;
 605     opal_cr_checkpoint_request  = OPAL_CR_STATUS_NONE;
 606 
 607  STAGE_1:
 608     if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_START) ) ) {
 609         opal_output(opal_cr_output,
 610                     "Error: opal_cr: test_if_checkpoint_ready: Respond [Start Ckpt] Failed. (%d)",
 611                     ret);
 612     }
 613 
 614     return;
 615 }
 616 
 617 /*******************************
 618  * Notification Routines
 619  *******************************/
 620 int opal_cr_inc_core_prep(void)
 621 {
 622     int ret;
 623 
 624     /*
 625      * Call User Level INC
 626      */
 627     if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_PRE_MPI,
 628                                                         OPAL_CR_INC_STATE_PREPARE)) ) {
 629         return ret;
 630     }
 631 
 632     /*
 633      * Use the registered coordination routine
 634      */
 635     if(OPAL_SUCCESS != (ret = cur_coord_callback(OPAL_CRS_CHECKPOINT)) ) {
 636         if ( OPAL_EXISTS != ret ) {
 637             opal_output(opal_cr_output,
 638                         "opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n",
 639                         OPAL_CRS_CHECKPOINT, ret);
 640         }
 641         return ret;
 642     }
 643 
 644     /*
 645      * Call User Level INC
 646      */
 647     if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_POST_MPI,
 648                                                         OPAL_CR_INC_STATE_PREPARE)) ) {
 649         return ret;
 650     }
 651 
 652     core_prev_pid = getpid();
 653 
 654     return OPAL_SUCCESS;
 655 }
 656 
 657 int opal_cr_inc_core_ckpt(pid_t pid,
 658                           opal_crs_base_snapshot_t *snapshot,
 659                           opal_crs_base_ckpt_options_t *options,
 660                           int *state)
 661 {
 662     int ret, exit_status = OPAL_SUCCESS;
 663 
 664     OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE0);
 665     if(OPAL_SUCCESS != (ret = opal_crs.crs_checkpoint(pid,
 666                                                       snapshot,
 667                                                       options,
 668                                                       (opal_crs_state_type_t *)state))) {
 669         opal_output(opal_cr_output,
 670                     "opal_cr: inc_core: Error: The checkpoint failed. %d\n", ret);
 671         exit_status = ret;
 672     }
 673 
 674     if(*state == OPAL_CRS_CONTINUE) {
 675         OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1);
 676 
 677         if(options->term) {
 678             *state = OPAL_CRS_TERM;
 679             opal_cr_checkpointing_state  = OPAL_CR_STATUS_TERM;
 680         } else {
 681             opal_cr_checkpointing_state  = OPAL_CR_STATUS_CONTINUE;
 682         }
 683     }
 684     else {
 685         options->term = false;
 686     }
 687 
 688     /*
 689      * If restarting read environment stuff that opal-restart left us.
 690      */
 691     if(*state == OPAL_CRS_RESTART) {
 692         opal_cr_refresh_environ(core_prev_pid);
 693         opal_cr_checkpointing_state  = OPAL_CR_STATUS_RESTART_PRE;
 694     }
 695 
 696     return exit_status;
 697 }
 698 
 699 int opal_cr_inc_core_recover(int state)
 700 {
 701     int ret;
 702     opal_cr_user_inc_callback_state_t cb_state;
 703 
 704     if( opal_cr_checkpointing_state != OPAL_CR_STATUS_TERM &&
 705         opal_cr_checkpointing_state != OPAL_CR_STATUS_CONTINUE &&
 706         opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_PRE &&
 707         opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_POST ) {
 708 
 709         if(state == OPAL_CRS_CONTINUE) {
 710             OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1);
 711             opal_cr_checkpointing_state  = OPAL_CR_STATUS_CONTINUE;
 712         }
 713         /*
 714          * If restarting read environment stuff that opal-restart left us.
 715          */
 716         else if(state == OPAL_CRS_RESTART) {
 717             opal_cr_refresh_environ(core_prev_pid);
 718             opal_cr_checkpointing_state  = OPAL_CR_STATUS_RESTART_PRE;
 719         }
 720     }
 721 
 722     /*
 723      * Call User Level INC
 724      */
 725     if( OPAL_CRS_CONTINUE == state ) {
 726         cb_state = OPAL_CR_INC_STATE_CONTINUE;
 727     }
 728     else if( OPAL_CRS_RESTART == state ) {
 729         cb_state = OPAL_CR_INC_STATE_RESTART;
 730     }
 731     else {
 732         cb_state = OPAL_CR_INC_STATE_ERROR;
 733     }
 734 
 735     if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_PRE_MPI,
 736                                                         cb_state)) ) {
 737         return ret;
 738     }
 739 
 740     /*
 741      * Use the registered coordination routine
 742      */
 743     if(OPAL_SUCCESS != (ret = cur_coord_callback(state)) ) {
 744         if ( OPAL_EXISTS != ret ) {
 745             opal_output(opal_cr_output,
 746                         "opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n",
 747                         state, ret);
 748         }
 749         return ret;
 750     }
 751 
 752     if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_POST_MPI,
 753                                                         cb_state)) ) {
 754         return ret;
 755     }
 756 
 757 #if OPAL_ENABLE_CRDEBUG == 1
 758     opal_cr_debug_clear_current_ckpt_thread();
 759 #endif
 760 
 761     return OPAL_SUCCESS;
 762 }
 763 
 764 int opal_cr_inc_core(pid_t pid,
 765                      opal_crs_base_snapshot_t *snapshot,
 766                      opal_crs_base_ckpt_options_t *options,
 767                      int *state)
 768 {
 769     int ret, exit_status = OPAL_SUCCESS;
 770 
 771     /*
 772      * INC: Prepare stack using the registered coordination routine
 773      */
 774     if(OPAL_SUCCESS != (ret = opal_cr_inc_core_prep() ) ) {
 775         return ret;
 776     }
 777 
 778     /*
 779      * INC: Take the checkpoint
 780      */
 781     if(OPAL_SUCCESS != (ret = opal_cr_inc_core_ckpt(pid, snapshot, options, state) ) ) {
 782         exit_status = ret;
 783         /* Don't return here since we want to restart the OPAL level stuff */
 784     }
 785 
 786     /*
 787      * INC: Recover stack using the registered coordination routine
 788      */
 789     if(OPAL_SUCCESS != (ret = opal_cr_inc_core_recover(*state) ) ) {
 790         return ret;
 791     }
 792 
 793     return exit_status;
 794 }
 795 
 796 /*******************************
 797  * Coordination Routines
 798  *******************************/
 799 /**
 800  * Current Coordination callback routines
 801  */
 802 int opal_cr_coord(int state)
 803 {
 804     if(OPAL_CRS_CHECKPOINT == state) {
 805         /* Do Checkpoint Phase work */
 806     }
 807     else if (OPAL_CRS_CONTINUE == state ) {
 808         /* Do Continue Phase work */
 809     }
 810     else if (OPAL_CRS_RESTART == state ) {
 811         /* Do Restart Phase work */
 812 
 813         /*
 814          * Re-initialize the event engine
 815          * Otherwise it may/will use stale file descriptors which will disrupt
 816          * the intended users of the soon-to-be newly assigned file descriptors.
 817          */
 818         opal_event_reinit(opal_sync_event_base);
 819 
 820         /*
 821          * Flush if() functionality, since it caches system specific info.
 822          */
 823         (void) mca_base_framework_close(&opal_if_base_framework);
 824         /* Since opal_ifinit() is not exposed, the necessary
 825          * functions will call it when needed. Just make sure we
 826          * finalized this code so we don't get old socket addrs.
 827          */
 828         opal_output_reopen_all();
 829     }
 830     else if (OPAL_CRS_TERM == state ) {
 831         /* Do Continue Phase work in prep to terminate the application */
 832     }
 833     else {
 834         /* We must have been in an error state from the checkpoint
 835          * recreate everything, as in the Continue Phase
 836          */
 837     }
 838 
 839     /*
 840      * Here we are returning to either:
 841      *  - [orte | ompi]_notify()
 842      */
 843     opal_cr_checkpointing_state  = OPAL_CR_STATUS_RESTART_POST;
 844 
 845     return OPAL_SUCCESS;
 846 }
 847 
 848 int opal_cr_reg_notify_callback(opal_cr_notify_callback_fn_t  new_func,
 849                                 opal_cr_notify_callback_fn_t *prev_func)
 850 {
 851     /*
 852      * Preserve the previous callback
 853      */
 854     if( NULL != cur_notify_callback) {
 855         *prev_func = cur_notify_callback;
 856     }
 857     else {
 858         *prev_func = NULL;
 859     }
 860 
 861     /*
 862      * Update the callbacks
 863      */
 864     cur_notify_callback     = new_func;
 865 
 866     return OPAL_SUCCESS;
 867 }
 868 
 869 int opal_cr_user_inc_register_callback(opal_cr_user_inc_callback_event_t event,
 870                                        opal_cr_user_inc_callback_fn_t  function,
 871                                        opal_cr_user_inc_callback_fn_t  *prev_function)
 872 {
 873     if (event >= OPAL_CR_INC_MAX) {
 874         return OPAL_ERROR;
 875     }
 876 
 877     if( NULL != cur_user_coord_callback[event] ) {
 878         *prev_function = cur_user_coord_callback[event];
 879     } else {
 880         *prev_function = NULL;
 881     }
 882 
 883     cur_user_coord_callback[event] = function;
 884 
 885     return OPAL_SUCCESS;
 886 }
 887 
 888 int ompi_trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event,
 889                               opal_cr_user_inc_callback_state_t state)
 890 {
 891     if( NULL == cur_user_coord_callback[event] ) {
 892         return OPAL_SUCCESS;
 893     }
 894 
 895     if (event >= OPAL_CR_INC_MAX) {
 896         return OPAL_ERROR;
 897     }
 898 
 899     return ((cur_user_coord_callback[event])(event, state));
 900 }
 901 
 902 int opal_cr_reg_coord_callback(opal_cr_coord_callback_fn_t  new_func,
 903                                opal_cr_coord_callback_fn_t *prev_func)
 904 {
 905     /*
 906      * Preserve the previous callback
 907      */
 908     if( NULL != cur_coord_callback) {
 909         *prev_func = cur_coord_callback;
 910     }
 911     else {
 912         *prev_func = NULL;
 913     }
 914 
 915     /*
 916      * Update the callbacks
 917      */
 918     cur_coord_callback     = new_func;
 919 
 920     return OPAL_SUCCESS;
 921 }
 922 
 923 int opal_cr_refresh_environ(int prev_pid) {
 924     char *file_name;
 925 #if OPAL_ENABLE_CRDEBUG == 1
 926     char *tmp;
 927 #endif
 928     struct stat file_status;
 929 
 930     if( 0 >= prev_pid ) {
 931         prev_pid = getpid();
 932     }
 933 
 934     /*
 935      * Make sure the file exists. If it doesn't then this means 2 things:
 936      *  1) We have already executed this function, and
 937      *  2) The file has been deleted on the previous round.
 938      */
 939     opal_asprintf(&file_name, "%s/%s-%d", opal_tmp_directory(), OPAL_CR_BASE_ENV_NAME, prev_pid);
 940     if (NULL == file_name) {
 941         return OPAL_ERR_OUT_OF_RESOURCE;
 942     }
 943     if(0 != stat(file_name, &file_status) ){
 944         free(file_name);
 945         return OPAL_SUCCESS;
 946     }
 947 
 948 #if OPAL_ENABLE_CRDEBUG == 1
 949     mca_base_var_env_name ("opal_cr_enable_crdebug", &tmp);
 950     opal_unsetenv(tmp, &environ);
 951     free (tmp);
 952 #endif
 953 
 954     extract_env_vars(prev_pid, file_name);
 955 
 956 #if OPAL_ENABLE_CRDEBUG == 1
 957     MPIR_debug_with_checkpoint = 0;
 958     (void) mca_base_var_register ("opal", "opal", "cr", "enable_crdebug",
 959                                   "Enable checkpoint/restart debugging",
 960                                   MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 961                                   OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
 962                                   &MPIR_debug_with_checkpoint);
 963 
 964     opal_output_verbose(10, opal_cr_output,
 965                         "opal_cr: init: C/R Debugging Enabled [%s] (refresh)\n",
 966                         (MPIR_debug_with_checkpoint ? "True": "False"));
 967 #endif
 968 
 969     free(file_name);
 970 
 971     return OPAL_SUCCESS;
 972 }
 973 
 974 /*
 975  * Extract environment variables from a saved file
 976  * and place them in the environment.
 977  */
 978 static int extract_env_vars(int prev_pid, char * file_name)
 979 {
 980     int exit_status = OPAL_SUCCESS;
 981     FILE *env_data = NULL;
 982     int len = OPAL_PATH_MAX;
 983     char * tmp_str = NULL;
 984 
 985     if( 0 >= prev_pid ) {
 986         opal_output(opal_cr_output,
 987                     "opal_cr: extract_env_vars: Invalid PID (%d)\n",
 988                     prev_pid);
 989         exit_status = OPAL_ERROR;
 990         goto cleanup;
 991     }
 992 
 993     if (NULL == (env_data = fopen(file_name, "r")) ) {
 994         exit_status = OPAL_ERROR;
 995         goto cleanup;
 996     }
 997 
 998     tmp_str = (char *) malloc(sizeof(char) * OPAL_PATH_MAX);
 999     if( NULL == tmp_str) {
1000         exit_status = OPAL_ERR_OUT_OF_RESOURCE;
1001         goto cleanup;
1002     }
1003     /* Extract an env var */
1004     while(!feof(env_data) ) {
1005         char **t_set = NULL;
1006 
1007         if( NULL == fgets(tmp_str, OPAL_PATH_MAX, env_data) ) {
1008             exit_status = OPAL_ERROR;
1009             goto cleanup;
1010         }
1011         len = strlen(tmp_str);
1012         if(tmp_str[len - 1] == '\n') {
1013             tmp_str[len - 1] = '\0';
1014         } else {
1015             opal_output(opal_cr_output,
1016                         "opal_cr: extract_env_vars: Error: Parameter too long (%s)\n",
1017                         tmp_str);
1018             continue;
1019         }
1020 
1021         if( NULL == (t_set = opal_argv_split(tmp_str, '=')) ) {
1022             break;
1023         }
1024 
1025         opal_setenv(t_set[0], t_set[1], true, &environ);
1026 
1027         opal_argv_free(t_set);
1028     }
1029 
1030  cleanup:
1031     if( NULL != env_data ) {
1032         fclose(env_data);
1033     }
1034     unlink(file_name);
1035 
1036     if( NULL != tmp_str ){
1037         free(tmp_str);
1038     }
1039 
1040     return exit_status;
1041 }
1042 
1043 /*****************************************
1044  * OPAL CR Entry Point Functionality
1045 *****************************************/
1046 /*
1047  * Used only for debugging SIGPIPE problems
1048  */
1049 static void opal_cr_sigpipe_debug_signal_handler (int signo)
1050 {
1051     int sleeper = 1;
1052 
1053     if( !opal_cr_debug_sigpipe ) {
1054         opal_output_verbose(10, opal_cr_output,
1055                             "opal_cr: sigpipe_debug: Debug SIGPIPE Not enabled :(\n");
1056         return;
1057     }
1058 
1059     opal_output(0,
1060                 "opal_cr: sigpipe_debug: Debug SIGPIPE [%d]: PID (%d)\n",
1061                 signo, getpid());
1062     while(sleeper == 1 ) {
1063         sleep(1);
1064     }
1065 }
1066 
1067 #if OPAL_ENABLE_FT_THREAD == 1
1068 static void* opal_cr_thread_fn(opal_object_t *obj)
1069 {
1070     /* Sanity Check */
1071     if( !opal_cr_thread_use_if_avail ) {
1072         return NULL;
1073     }
1074 
1075     if( opal_cr_debug_sigpipe ) {
1076         if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
1077             ;
1078         }
1079     }
1080 
1081     /*
1082      * Register this thread with the OPAL CRS
1083      */
1084     if( NULL != opal_crs.crs_reg_thread ) {
1085         if( OPAL_SUCCESS != opal_crs.crs_reg_thread() ) {
1086             opal_output(0, "Error: Thread registration failed\n");
1087             return NULL;
1088         }
1089     }
1090 
1091 #if OPAL_ENABLE_CRDEBUG == 1
1092     opal_cr_debug_free_threads[1] = opal_thread_get_self();
1093 #endif
1094 
1095     /*
1096      * Wait to become active
1097      */
1098     while( !opal_cr_thread_is_active && !opal_cr_thread_is_done) {
1099         sched_yield();
1100     }
1101 
1102     if( opal_cr_thread_is_done ) {
1103         return NULL;
1104     }
1105 
1106     /*
1107      * While active
1108      */
1109     while( opal_cr_thread_is_active && !opal_cr_thread_is_done) {
1110         /*
1111          * While no threads are in the MPI library then try to process
1112          * checkpoint requests.
1113          */
1114         OPAL_CR_THREAD_LOCK();
1115 
1116         while ( !opal_cr_thread_in_library ) {
1117             sched_yield();
1118             usleep(opal_cr_thread_sleep_check);
1119 
1120             OPAL_CR_TEST_CHECKPOINT_READY();
1121             /* Sanity check */
1122             if( OPAL_UNLIKELY(opal_cr_currently_stalled) ) {
1123                 OPAL_CR_TEST_CHECKPOINT_READY();
1124             }
1125         }
1126 
1127         /*
1128          * While they are in the MPI library yield
1129          */
1130         OPAL_CR_THREAD_UNLOCK();
1131 
1132         while ( opal_cr_thread_in_library && opal_cr_thread_is_active ) {
1133             usleep(opal_cr_thread_sleep_wait);
1134         }
1135     }
1136 
1137     return NULL;
1138 }
1139 
1140 void opal_cr_thread_init_library(void)
1141 {
1142     if( !opal_cr_thread_use_if_avail ) {
1143         OPAL_CR_TEST_CHECKPOINT_READY();
1144     } else {
1145         /* Activate the CR Thread */
1146         opal_cr_thread_in_library = false;
1147         opal_cr_thread_is_done    = false;
1148         opal_cr_thread_is_active  = true;
1149     }
1150 }
1151 
1152 void opal_cr_thread_finalize_library(void)
1153 {
1154     if( !opal_cr_thread_use_if_avail ) {
1155         OPAL_CR_TEST_CHECKPOINT_READY();
1156     } else {
1157         /* Deactivate the CR Thread */
1158         opal_cr_thread_is_done    = true;
1159         opal_cr_thread_is_active  = false;
1160         OPAL_CR_LOCK();
1161         opal_cr_thread_in_library = true;
1162     }
1163 }
1164 
1165 void opal_cr_thread_abort_library(void)
1166 {
1167     if( !opal_cr_thread_use_if_avail ) {
1168         OPAL_CR_TEST_CHECKPOINT_READY();
1169     } else {
1170         /* Deactivate the CR Thread */
1171         opal_cr_thread_is_done    = true;
1172         opal_cr_thread_is_active  = false;
1173         OPAL_CR_LOCK();
1174         opal_cr_thread_in_library = true;
1175     }
1176 }
1177 
1178 void opal_cr_thread_enter_library(void)
1179 {
1180     if( !opal_cr_thread_use_if_avail ) {
1181         OPAL_CR_TEST_CHECKPOINT_READY();
1182     } else {
1183         /* Lock out the CR Thread */
1184         OPAL_CR_LOCK();
1185     }
1186 }
1187 
1188 void opal_cr_thread_exit_library(void)
1189 {
1190     if( !opal_cr_thread_use_if_avail ) {
1191         OPAL_CR_TEST_CHECKPOINT_READY();
1192     } else {
1193         /* Allow CR Thread to continue */
1194         OPAL_CR_UNLOCK();
1195     }
1196 }
1197 
1198 void opal_cr_thread_noop_progress(void)
1199 {
1200     if( !opal_cr_thread_use_if_avail ) {
1201         OPAL_CR_TEST_CHECKPOINT_READY();
1202     }
1203 }
1204 
1205 #endif /* OPAL_ENABLE_FT_THREAD == 1 */
1206 
1207 static double opal_cr_get_time() {
1208     double wtime;
1209 
1210 #if OPAL_TIMER_USEC_NATIVE
1211     wtime = (double)opal_timer_base_get_usec() / 1000000.0;
1212 #else
1213     struct timeval tv;
1214     gettimeofday(&tv, NULL);
1215     wtime = tv.tv_sec;
1216     wtime += (double)tv.tv_usec / 1000000.0;
1217 #endif
1218 
1219     return wtime;
1220 }
1221 
1222 void opal_cr_set_time(int idx)
1223 {
1224     if(idx < OPAL_CR_TIMER_MAX ) {
1225         if( timer_start[idx] <= 0.0 ) {
1226             timer_start[idx] = opal_cr_get_time();
1227         }
1228     }
1229 }
1230 
1231 void opal_cr_clear_timers(void)
1232 {
1233     int i;
1234     for(i = 0; i < OPAL_CR_TIMER_MAX; ++i) {
1235         timer_start[i] = 0.0;
1236     }
1237 }
1238 
1239 static void display_indv_timer_core(double diff, char *str) {
1240     double total = 0;
1241     double perc  = 0;
1242 
1243     total = timer_start[OPAL_CR_TIMER_MAX-1] - timer_start[OPAL_CR_TIMER_ENTRY0];
1244     perc = (diff/total) * 100;
1245 
1246     opal_output(0,
1247                 "opal_cr: timing: %-20s = %10.2f s\t%10.2f s\t%6.2f\n",
1248                 str,
1249                 diff,
1250                 total,
1251                 perc);
1252     return;
1253 }
1254 
1255 void opal_cr_display_all_timers(void)
1256 {
1257     double diff = 0.0;
1258     char * label = NULL;
1259 
1260     if( opal_cr_timing_target_rank != opal_cr_timing_my_rank ) {
1261         return;
1262     }
1263 
1264     opal_output(0, "OPAL CR Timing: ******************** Summary Begin\n");
1265 
1266     /********** Entry into the system **********/
1267     label = strdup("Start Entry Point");
1268     if( opal_cr_timing_barrier_enabled ) {
1269         diff = timer_start[OPAL_CR_TIMER_CRCPBR0] - timer_start[OPAL_CR_TIMER_ENTRY0];
1270     } else {
1271         diff = timer_start[OPAL_CR_TIMER_CRCP0]   - timer_start[OPAL_CR_TIMER_ENTRY0];
1272     }
1273     display_indv_timer_core(diff, label);
1274     free(label);
1275 
1276     /********** CRCP Protocol **********/
1277     label = strdup("CRCP Protocol");
1278     if( opal_cr_timing_barrier_enabled ) {
1279         diff = timer_start[OPAL_CR_TIMER_CRCPBR1] - timer_start[OPAL_CR_TIMER_CRCP0];
1280     } else {
1281         diff = timer_start[OPAL_CR_TIMER_P2P0]    - timer_start[OPAL_CR_TIMER_CRCP0];
1282     }
1283     display_indv_timer_core(diff, label);
1284     free(label);
1285 
1286     /********** P2P Suspend **********/
1287     label = strdup("P2P Suspend");
1288     if( opal_cr_timing_barrier_enabled ) {
1289         diff = timer_start[OPAL_CR_TIMER_P2PBR0]     - timer_start[OPAL_CR_TIMER_P2P0];
1290     } else {
1291         diff = timer_start[OPAL_CR_TIMER_CORE0]     - timer_start[OPAL_CR_TIMER_P2P0];
1292     }
1293     display_indv_timer_core(diff, label);
1294     free(label);
1295 
1296     /********** Checkpoint to Disk  **********/
1297     label = strdup("Checkpoint");
1298     diff = timer_start[OPAL_CR_TIMER_CORE1]    - timer_start[OPAL_CR_TIMER_CORE0];
1299     display_indv_timer_core(diff, label);
1300     free(label);
1301 
1302     /********** P2P Reactivation **********/
1303     label = strdup("P2P Reactivation");
1304     if( opal_cr_timing_barrier_enabled ) {
1305         diff = timer_start[OPAL_CR_TIMER_P2PBR2] - timer_start[OPAL_CR_TIMER_CORE1];
1306     } else {
1307         diff = timer_start[OPAL_CR_TIMER_CRCP1]  - timer_start[OPAL_CR_TIMER_CORE1];
1308     }
1309     display_indv_timer_core(diff, label);
1310     free(label);
1311 
1312     /********** CRCP Protocol Finalize **********/
1313     label = strdup("CRCP Cleanup");
1314     if( opal_cr_timing_barrier_enabled ) {
1315         diff = timer_start[OPAL_CR_TIMER_COREBR1] - timer_start[OPAL_CR_TIMER_CRCP1];
1316     } else {
1317         diff = timer_start[OPAL_CR_TIMER_CORE2]   - timer_start[OPAL_CR_TIMER_CRCP1];
1318     }
1319     display_indv_timer_core(diff, label);
1320     free(label);
1321 
1322     /********** Exit the system **********/
1323     label = strdup("Finish Entry Point");
1324     diff = timer_start[OPAL_CR_TIMER_ENTRY4] - timer_start[OPAL_CR_TIMER_CORE2];
1325     display_indv_timer_core(diff, label);
1326     free(label);
1327 
1328     opal_output(0, "OPAL CR Timing: ******************** Summary End\n");
1329 }
1330 
1331 #if OPAL_ENABLE_CRDEBUG == 1
1332 int opal_cr_debug_set_current_ckpt_thread_self(void)
1333 {
1334     int t;
1335 
1336     if( NULL == opal_cr_debug_free_threads ) {
1337         opal_cr_debug_num_free_threads = 3;
1338         opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
1339         for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
1340             opal_cr_debug_free_threads[t] = NULL;
1341         }
1342     }
1343 
1344     opal_cr_debug_free_threads[0] = opal_thread_get_self();
1345 
1346     return OPAL_SUCCESS;
1347 }
1348 
1349 int opal_cr_debug_clear_current_ckpt_thread(void)
1350 {
1351     opal_cr_debug_free_threads[0] = NULL;
1352 
1353     return OPAL_SUCCESS;
1354 }
1355 
1356 int MPIR_checkpoint_debugger_detach(void) {
1357     /* This function is meant to be a noop function for checkpoint/restart
1358      * enabled debugging functionality */
1359 #if 0
1360     /* Once the debugger can successfully force threads into the function below,
1361      * then we can uncomment this line */
1362     if( MPIR_debug_with_checkpoint ) {
1363         opal_cr_debug_threads_already_waiting = true;
1364     }
1365 #endif
1366     return OPAL_SUCCESS;
1367 }
1368 
1369 void MPIR_checkpoint_debugger_signal_handler(int signo)
1370 {
1371     opal_output_verbose(1, opal_cr_output,
1372                         "crs: MPIR_checkpoint_debugger_signal_handler(): Enter Debug signal handler...");
1373 
1374     MPIR_checkpoint_debugger_waitpoint();
1375 
1376     opal_output_verbose(1, opal_cr_output,
1377                         "crs: MPIR_checkpoint_debugger_signal_handler(): Leave Debug signal handler...");
1378 }
1379 
1380 void *MPIR_checkpoint_debugger_waitpoint(void)
1381 {
1382     int t;
1383     opal_thread_t *thr = NULL;
1384 
1385     thr = opal_thread_get_self();
1386 
1387     /*
1388      * Sanity check, if the debugger is not going to attach, then do not wait
1389      * Make sure to open the debug gate, so that threads can get out
1390      */
1391     if( !MPIR_debug_with_checkpoint ) {
1392         opal_output_verbose(1, opal_cr_output,
1393                             "crs: MPIR_checkpoint_debugger_waitpoint(): Debugger is not attaching... (%d)",
1394                             (int)thr->t_handle);
1395         MPIR_checkpoint_debug_gate = 1;
1396         return NULL;
1397     }
1398     else {
1399         opal_output_verbose(1, opal_cr_output,
1400                             "crs: MPIR_checkpoint_debugger_waitpoint(): Waiting for the Debugger to attach... (%d)",
1401                             (int)thr->t_handle);
1402         MPIR_checkpoint_debug_gate = 0;
1403     }
1404 
1405     /*
1406      * Let special threads escape without waiting, they will wait later
1407      */
1408     for(t = 0; t < opal_cr_debug_num_free_threads; ++t) {
1409         if( opal_cr_debug_free_threads[t] != NULL &&
1410             opal_thread_self_compare(opal_cr_debug_free_threads[t]) ) {
1411             opal_output_verbose(1, opal_cr_output,
1412                                 "crs: MPIR_checkpoint_debugger_waitpoint(): Checkpointing thread does not wait here... (%d)",
1413                                 (int)thr->t_handle);
1414             return NULL;
1415         }
1416     }
1417 
1418     /*
1419      * Force all other threads into the waiting function,
1420      * unless they are already in there, then just return so we do not nest
1421      * calls into this wait function and potentially confuse the debugger.
1422      */
1423     if( opal_cr_debug_threads_already_waiting ) {
1424         opal_output_verbose(1, opal_cr_output,
1425                             "crs: MPIR_checkpoint_debugger_waitpoint(): Threads are already waiting from debugger detach, do not wait here... (%d)",
1426                             (int)thr->t_handle);
1427         return NULL;
1428     } else {
1429         opal_output_verbose(1, opal_cr_output,
1430                             "crs: MPIR_checkpoint_debugger_waitpoint(): Wait... (%d)",
1431                             (int)thr->t_handle);
1432         return MPIR_checkpoint_debugger_breakpoint();
1433     }
1434 }
1435 
1436 /*
1437  * A tight loop to wait for debugger to release this process from the
1438  * breakpoint.
1439  */
1440 void *MPIR_checkpoint_debugger_breakpoint(void)
1441 {
1442     /* spin until debugger attaches and releases us */
1443     while (MPIR_checkpoint_debug_gate == 0) {
1444 #if defined(HAVE_USLEEP)
1445         usleep(100000); /* microseconds */
1446 #else
1447         sleep(1);       /* seconds */
1448 #endif
1449     }
1450     opal_cr_debug_threads_already_waiting = false;
1451     return NULL;
1452 }
1453 #endif

/* [<][>][^][v][top][bottom][index][help] */