root/opal/mca/crs/base/crs_base_fns.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_crs_base_construct
  2. opal_crs_base_destruct
  3. opal_crs_base_ckpt_options_construct
  4. opal_crs_base_ckpt_options_destruct
  5. opal_crs_base_metadata_read_token
  6. opal_crs_base_extract_expected_component
  7. opal_crs_base_cleanup_append
  8. opal_crs_base_cleanup_flush
  9. opal_crs_base_state_str
  10. opal_crs_base_copy_options
  11. opal_crs_base_clear_options
  12. opal_crs_base_self_register_checkpoint_callback
  13. opal_crs_base_self_register_restart_callback
  14. opal_crs_base_self_register_continue_callback
  15. metadata_extract_next_token

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2010 The Trustees of Indiana University.
   4  *                         All rights reserved.
   5  * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
   6  *                         All rights reserved.
   7  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   8  *                         University of Stuttgart.  All rights reserved.
   9  * Copyright (c) 2004-2005 The Regents of the University of California.
  10  *                         All rights reserved.
  11  * Copyright (c) 2007      Evergrid, Inc. All rights reserved.
  12  * Copyright (c) 2015      Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2015      Research Organization for Information Science
  14  *                         and Technology (RIST). All rights reserved.
  15  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
  16  *                         reserved.
  17  * Copyright (c) 2017      IBM Corporation. All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 #include "opal_config.h"
  26 
  27 #include <string.h>
  28 #ifdef HAVE_SYS_TYPES_H
  29 #include <sys/types.h>
  30 #endif
  31 #ifdef HAVE_UNISTD_H
  32 #include <unistd.h>
  33 #endif
  34 #ifdef HAVE_FCNTL_H
  35 #include <fcntl.h>
  36 #endif  /* HAVE_FCNTL_H */
  37 #ifdef HAVE_SYS_STAT_H
  38 #include <sys/stat.h>
  39 #endif
  40 
  41 #include "opal/mca/mca.h"
  42 #include "opal/mca/base/base.h"
  43 #include "opal/constants.h"
  44 #include "opal/util/os_dirpath.h"
  45 #include "opal/util/output.h"
  46 #include "opal/util/argv.h"
  47 
  48 #include "opal/mca/crs/crs.h"
  49 #include "opal/mca/crs/base/base.h"
  50 
  51 opal_crs_base_self_checkpoint_fn_t ompi_crs_base_self_checkpoint_fn = NULL;
  52 opal_crs_base_self_restart_fn_t    ompi_crs_base_self_restart_fn = NULL;
  53 opal_crs_base_self_continue_fn_t   ompi_crs_base_self_continue_fn = NULL;
  54 
  55 /******************
  56  * Local Functions
  57  ******************/
  58 static int metadata_extract_next_token(FILE *file, char **token, char **value);
  59 
  60 static char **cleanup_file_argv = NULL;
  61 static char **cleanup_dir_argv = NULL;
  62 
  63 /******************
  64  * Object stuff
  65  ******************/
  66 static void opal_crs_base_construct(opal_crs_base_snapshot_t *snapshot)
  67 {
  68     snapshot->component_name     = NULL;
  69 
  70     snapshot->metadata_filename  = NULL;
  71     snapshot->metadata           = NULL;
  72     snapshot->snapshot_directory = NULL;
  73 
  74     snapshot->cold_start      = false;
  75 }
  76 
  77 static void opal_crs_base_destruct( opal_crs_base_snapshot_t *snapshot)
  78 {
  79     if(NULL != snapshot->metadata_filename ) {
  80         free(snapshot->metadata_filename);
  81         snapshot->metadata_filename = NULL;
  82     }
  83 
  84     if(NULL != snapshot->metadata) {
  85         fclose(snapshot->metadata);
  86         snapshot->metadata = NULL;
  87     }
  88 
  89     if(NULL != snapshot->snapshot_directory ) {
  90        free(snapshot->snapshot_directory);
  91        snapshot->snapshot_directory = NULL;
  92     }
  93 }
  94 
  95 OBJ_CLASS_INSTANCE(opal_crs_base_snapshot_t,
  96                    opal_list_item_t,
  97                    opal_crs_base_construct,
  98                    opal_crs_base_destruct);
  99 
 100 static void opal_crs_base_ckpt_options_construct(opal_crs_base_ckpt_options_t *opts) {
 101     opal_crs_base_clear_options(opts);
 102 }
 103 
 104 static void opal_crs_base_ckpt_options_destruct(opal_crs_base_ckpt_options_t *opts) {
 105     opal_crs_base_clear_options(opts);
 106 }
 107 
 108 OBJ_CLASS_INSTANCE(opal_crs_base_ckpt_options_t,
 109                    opal_object_t,
 110                    opal_crs_base_ckpt_options_construct,
 111                    opal_crs_base_ckpt_options_destruct);
 112 
 113 /*
 114  * Utility functions
 115  */
 116 int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***value) {
 117     int argc = 0;
 118 
 119     /* Dummy check */
 120     if (NULL == token || NULL == metadata) {
 121         return OPAL_ERROR;
 122     }
 123 
 124     /*
 125      * Extract each token and make the records
 126      */
 127     rewind(metadata);
 128     do {
 129         char *loc_token = NULL, *loc_value = NULL;
 130 
 131         /* Get next token */
 132         if( OPAL_SUCCESS != metadata_extract_next_token(metadata, &loc_token, &loc_value) ) {
 133             break;
 134         }
 135 
 136         /* Check token to see if it matches */
 137         if(0 == strncmp(token, loc_token, strlen(loc_token)) ) {
 138             opal_argv_append(&argc, value, loc_value);
 139         }
 140 
 141         free (loc_token);
 142         free (loc_value);
 143     } while (0 == feof(metadata));
 144 
 145     return OPAL_SUCCESS;
 146 }
 147 
 148 int opal_crs_base_extract_expected_component(FILE *metadata, char ** component_name, int *prev_pid)
 149 {
 150     int exit_status = OPAL_SUCCESS;
 151     char **pid_argv = NULL;
 152     char **name_argv = NULL;
 153 
 154     /* Dummy check */
 155     if( NULL == metadata ) {
 156         exit_status = OPAL_ERROR;
 157         goto cleanup;
 158     }
 159 
 160     opal_crs_base_metadata_read_token(metadata, CRS_METADATA_PID, &pid_argv);
 161     if( NULL != pid_argv && NULL != pid_argv[0] ) {
 162         *prev_pid = atoi(pid_argv[0]);
 163     } else {
 164         opal_output(0, "Error: expected_component: PID information unavailable!");
 165         exit_status = OPAL_ERROR;
 166         goto cleanup;
 167     }
 168 
 169     opal_crs_base_metadata_read_token(metadata, CRS_METADATA_COMP, &name_argv);
 170     if( NULL != name_argv && NULL != name_argv[0] ) {
 171         *component_name = strdup(name_argv[0]);
 172     } else {
 173         opal_output(0, "Error: expected_component: Component Name information unavailable!");
 174         exit_status = OPAL_ERROR;
 175         goto cleanup;
 176     }
 177 
 178  cleanup:
 179     if( NULL != pid_argv ) {
 180         opal_argv_free(pid_argv);
 181         pid_argv = NULL;
 182     }
 183 
 184     if( NULL != name_argv ) {
 185         opal_argv_free(name_argv);
 186         name_argv = NULL;
 187     }
 188 
 189     return exit_status;
 190 }
 191 
 192 int opal_crs_base_cleanup_append(char* filename, bool is_dir)
 193 {
 194     if( NULL == filename ) {
 195         return OPAL_SUCCESS;
 196     }
 197 
 198     if( is_dir ) {
 199         opal_output_verbose(15, opal_crs_base_framework.framework_output,
 200                             "opal:crs: cleanup_append: Append Dir  <%s>\n",
 201                             filename);
 202         opal_argv_append_nosize(&cleanup_dir_argv, filename);
 203     } else {
 204         opal_output_verbose(15, opal_crs_base_framework.framework_output,
 205                             "opal:crs: cleanup_append: Append File <%s>\n",
 206                             filename);
 207         opal_argv_append_nosize(&cleanup_file_argv, filename);
 208     }
 209 
 210     return OPAL_SUCCESS;
 211 }
 212 
 213 int opal_crs_base_cleanup_flush(void)
 214 {
 215     int argc, i;
 216 
 217     /*
 218      * Cleanup files first
 219      */
 220     if( NULL != cleanup_file_argv ) {
 221         argc = opal_argv_count(cleanup_file_argv);
 222         for( i = 0; i < argc; ++i) {
 223             opal_output_verbose(15, opal_crs_base_framework.framework_output,
 224                                 "opal:crs: cleanup_flush: Remove File <%s>\n", cleanup_file_argv[i]);
 225             unlink(cleanup_file_argv[i]);
 226         }
 227 
 228         opal_argv_free(cleanup_file_argv);
 229         cleanup_file_argv = NULL;
 230     }
 231 
 232     /*
 233      * Try to cleanup directories next
 234      */
 235     if( NULL != cleanup_dir_argv ) {
 236         argc = opal_argv_count(cleanup_dir_argv);
 237         for( i = 0; i < argc; ++i) {
 238             opal_output_verbose(15, opal_crs_base_framework.framework_output,
 239                                 "opal:crs: cleanup_flush: Remove Dir  <%s>\n", cleanup_dir_argv[i]);
 240             opal_os_dirpath_destroy(cleanup_dir_argv[i], true, NULL);
 241         }
 242 
 243         opal_argv_free(cleanup_dir_argv);
 244         cleanup_dir_argv = NULL;
 245     }
 246 
 247     return OPAL_SUCCESS;
 248 }
 249 
 250 char * opal_crs_base_state_str(opal_crs_state_type_t state)
 251 {
 252     char *str = NULL;
 253 
 254     switch(state) {
 255     case OPAL_CRS_CHECKPOINT:
 256         str = strdup("Checkpoint");
 257         break;
 258     case OPAL_CRS_RESTART:
 259         str = strdup("Restart");
 260         break;
 261     case OPAL_CRS_CONTINUE:
 262         str = strdup("Continue");
 263         break;
 264     case OPAL_CRS_TERM:
 265         str = strdup("Terminate");
 266         break;
 267     case OPAL_CRS_RUNNING:
 268         str = strdup("Running");
 269         break;
 270     case OPAL_CRS_ERROR:
 271         str = strdup("Error");
 272         break;
 273     default:
 274         str = strdup("Unknown");
 275         break;
 276     }
 277 
 278     return str;
 279 }
 280 
 281 int opal_crs_base_copy_options(opal_crs_base_ckpt_options_t *from,
 282                                  opal_crs_base_ckpt_options_t *to)
 283 {
 284     if( NULL == from ) {
 285         opal_output(opal_crs_base_framework.framework_output,
 286                     "opal:crs:base: copy_options: Error: from value is NULL\n");
 287         return OPAL_ERROR;
 288     }
 289 
 290     if( NULL == to ) {
 291         opal_output(opal_crs_base_framework.framework_output,
 292                     "opal:crs:base: copy_options: Error: to value is NULL\n");
 293         return OPAL_ERROR;
 294     }
 295 
 296     to->term = from->term;
 297     to->stop = from->stop;
 298 
 299     to->inc_prep_only    = from->inc_prep_only;
 300     to->inc_recover_only = from->inc_recover_only;
 301 
 302 #if OPAL_ENABLE_CRDEBUG == 1
 303     to->attach_debugger = from->attach_debugger;
 304     to->detach_debugger = from->detach_debugger;
 305 #endif
 306 
 307     return OPAL_SUCCESS;
 308 }
 309 
 310 int opal_crs_base_clear_options(opal_crs_base_ckpt_options_t *target)
 311 {
 312     if( NULL == target ) {
 313         opal_output(opal_crs_base_framework.framework_output,
 314                     "opal:crs:base: copy_options: Error: target value is NULL\n");
 315         return OPAL_ERROR;
 316     }
 317 
 318     target->term = false;
 319     target->stop = false;
 320 
 321     target->inc_prep_only = false;
 322     target->inc_recover_only = false;
 323 
 324 #if OPAL_ENABLE_CRDEBUG == 1
 325     target->attach_debugger = false;
 326     target->detach_debugger = false;
 327 #endif
 328 
 329     return OPAL_SUCCESS;
 330 }
 331 
 332 int opal_crs_base_self_register_checkpoint_callback(opal_crs_base_self_checkpoint_fn_t  function)
 333 {
 334     ompi_crs_base_self_checkpoint_fn = function;
 335     return OPAL_SUCCESS;
 336 }
 337 
 338 int opal_crs_base_self_register_restart_callback(opal_crs_base_self_restart_fn_t  function)
 339 {
 340     ompi_crs_base_self_restart_fn = function;
 341     return OPAL_SUCCESS;
 342 }
 343 
 344 int opal_crs_base_self_register_continue_callback(opal_crs_base_self_continue_fn_t  function)
 345 {
 346     ompi_crs_base_self_continue_fn = function;
 347     return OPAL_SUCCESS;
 348 }
 349 
 350 
 351 /******************
 352  * Local Functions
 353  ******************/
 354 static int metadata_extract_next_token(FILE *file, char **token, char **value)
 355 {
 356     int exit_status = OPAL_SUCCESS;
 357     const int max_len = 256;
 358     /* NTH: as long as max_len remains small (256 bytes) there is no need
 359      * to allocate line on the heap */
 360     char line[256];
 361     int line_len = 0, value_len;
 362     char *local_value = NULL;
 363     bool end_of_line = false;
 364     char *tmp;
 365 
 366     /*
 367      * If we are at the end of the file, then just return
 368      */
 369     do {
 370         /*
 371          * Other wise grab the next token/value pair
 372          */
 373         if (NULL == fgets(line, max_len, file) ) {
 374             /* the calling code doesn't distinguish error types so
 375              * returning OPAL_ERROR on error or EOF is ok. if this
 376              * changes re-add the check for EOF. */
 377             return OPAL_ERROR;
 378         }
 379 
 380         line_len = strlen(line);
 381 
 382         /* Strip off the new line if it is there */
 383         end_of_line = ('\n' == line[line_len-1]);
 384 
 385         if (end_of_line) {
 386             line[--line_len] = '\0';
 387         }
 388 
 389         /* Ignore lines with just '#' too */
 390     } while (line_len <= 2);
 391 
 392     /*
 393      * Extract the token from the set
 394      */
 395     tmp = strchr (line, ':');
 396     if (!tmp) {
 397         /* no separator */
 398         return OPAL_ERROR;
 399     }
 400 
 401     *tmp = '\0';
 402 
 403     *token = strdup (line);
 404     if (NULL == *token) {
 405         return OPAL_ERR_OUT_OF_RESOURCE;
 406     }
 407     local_value = strdup (tmp + 1);
 408     if (NULL == local_value) {
 409         free(*token);
 410         *token = NULL;
 411         return OPAL_ERR_OUT_OF_RESOURCE;
 412     }
 413 
 414     value_len = strlen (local_value) + 1;
 415 
 416     /*
 417      * Extract the value from the set
 418      */
 419     while(!end_of_line) {
 420         if (NULL == fgets(line, max_len, file) ) {
 421             exit_status = OPAL_ERROR;
 422             break;
 423         }
 424 
 425         line_len = strlen(line);
 426 
 427         /* Strip off the new line if it is there */
 428         end_of_line = ('\n' == line[line_len-1]);
 429 
 430         if (end_of_line) {
 431             line[--line_len] = '\0';
 432         }
 433 
 434         value_len += line_len;
 435 
 436         tmp = (char *) realloc(local_value, value_len);
 437         if (NULL == tmp) {
 438             exit_status = OPAL_ERR_OUT_OF_RESOURCE;
 439             break;
 440         }
 441         local_value = tmp;
 442 
 443         strcat (local_value, line);
 444     }
 445 
 446     if (OPAL_SUCCESS == exit_status) {
 447         *value = local_value;
 448     } else {
 449         free (local_value);
 450     }
 451 
 452     return exit_status;
 453 }

/* [<][>][^][v][top][bottom][index][help] */