root/orte/mca/snapc/full/snapc_full_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_snapc_full_orted_construct
  2. orte_snapc_full_orted_destruct
  3. orte_snapc_full_app_construct
  4. orte_snapc_full_app_destruct
  5. orte_snapc_full_component_query
  6. orte_snapc_full_module_init
  7. orte_snapc_full_module_finalize
  8. orte_snapc_full_setup_job
  9. orte_snapc_full_release_job
  10. orte_snapc_full_ft_event
  11. orte_snapc_full_start_ckpt
  12. orte_snapc_full_end_ckpt
  13. orte_snapc_full_request_op

   1 /*
   2  * Copyright (c) 2004-2012 The Trustees of Indiana University.
   3  *                         All rights reserved.
   4  * Copyright (c) 2004-2011 The Trustees of the University of Tennessee.
   5  *                         All rights reserved.
   6  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   7  *                         University of Stuttgart.  All rights reserved.
   8  * Copyright (c) 2004-2005 The Regents of the University of California.
   9  *                         All rights reserved.
  10  * $COPYRIGHT$
  11  *
  12  * Additional copyrights may follow
  13  *
  14  * $HEADER$
  15  */
  16 
  17 #include "orte_config.h"
  18 
  19 #include <sys/types.h>
  20 #ifdef HAVE_UNISTD_H
  21 #include <unistd.h>
  22 #endif  /* HAVE_UNISTD_H */
  23 
  24 #include "orte/mca/mca.h"
  25 #include "opal/mca/base/base.h"
  26 
  27 #include "opal/util/output.h"
  28 #include "opal/util/opal_environ.h"
  29 
  30 #include "orte/mca/snapc/snapc.h"
  31 #include "orte/mca/snapc/base/base.h"
  32 
  33 #include "snapc_full.h"
  34 
  35 /*
  36  * Full module
  37  */
  38 static orte_snapc_base_module_t loc_module = {
  39     /** Initialization Function */
  40     orte_snapc_full_module_init,
  41     /** Finalization Function */
  42     orte_snapc_full_module_finalize,
  43     orte_snapc_full_setup_job,
  44     orte_snapc_full_release_job,
  45     orte_snapc_full_ft_event,
  46     orte_snapc_full_start_ckpt,
  47     orte_snapc_full_end_ckpt,
  48     orte_snapc_full_request_op
  49 };
  50 
  51 /*
  52  * Global Snapshot structure
  53  */
  54 void orte_snapc_full_orted_construct(orte_snapc_full_orted_snapshot_t *obj);
  55 void orte_snapc_full_orted_destruct( orte_snapc_full_orted_snapshot_t *obj);
  56 
  57 OBJ_CLASS_INSTANCE(orte_snapc_full_orted_snapshot_t,
  58                    orte_snapc_base_global_snapshot_t,
  59                    orte_snapc_full_orted_construct,
  60                    orte_snapc_full_orted_destruct);
  61 
  62 /*
  63  * Local Snapshot structure
  64  */
  65 void orte_snapc_full_app_construct(orte_snapc_full_app_snapshot_t *obj);
  66 void orte_snapc_full_app_destruct( orte_snapc_full_app_snapshot_t *obj);
  67 
  68 OBJ_CLASS_INSTANCE(orte_snapc_full_app_snapshot_t,
  69                    orte_snapc_base_local_snapshot_t,
  70                    orte_snapc_full_app_construct,
  71                    orte_snapc_full_app_destruct);
  72 
  73 /************************************
  74  * Locally Global vars & functions :)
  75  ************************************/
  76 
  77 
  78 /************************
  79  * Function Definitions
  80  ************************/
  81 void orte_snapc_full_orted_construct(orte_snapc_full_orted_snapshot_t *snapshot) {
  82     snapshot->process_name.jobid  = 0;
  83     snapshot->process_name.vpid   = 0;
  84 
  85     snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;
  86 }
  87 
  88 void orte_snapc_full_orted_destruct( orte_snapc_full_orted_snapshot_t *snapshot) {
  89     snapshot->process_name.jobid  = 0;
  90     snapshot->process_name.vpid   = 0;
  91 
  92     snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE;
  93 }
  94 
  95 void orte_snapc_full_app_construct(orte_snapc_full_app_snapshot_t *app_snapshot) {
  96     app_snapshot->comm_pipe_r = NULL;
  97     app_snapshot->comm_pipe_w = NULL;
  98 
  99     app_snapshot->comm_pipe_r_fd = -1;
 100     app_snapshot->comm_pipe_w_fd = -1;
 101 
 102     app_snapshot->is_eh_active = false;
 103     app_snapshot->unique_pipe_id = 0;
 104 
 105     app_snapshot->process_pid  = 0;
 106 
 107     app_snapshot->migrating = false;
 108 
 109     app_snapshot->finished = false;
 110 }
 111 
 112 void orte_snapc_full_app_destruct( orte_snapc_full_app_snapshot_t *app_snapshot) {
 113     if( NULL != app_snapshot->comm_pipe_r ) {
 114         free(app_snapshot->comm_pipe_r);
 115         app_snapshot->comm_pipe_r = NULL;
 116     }
 117 
 118     if( NULL != app_snapshot->comm_pipe_w ) {
 119         free(app_snapshot->comm_pipe_w);
 120         app_snapshot->comm_pipe_w = NULL;
 121     }
 122 
 123     app_snapshot->comm_pipe_r_fd = -1;
 124     app_snapshot->comm_pipe_w_fd = -1;
 125 
 126     app_snapshot->is_eh_active = false;
 127     app_snapshot->unique_pipe_id = 0;
 128 
 129     app_snapshot->process_pid  = 0;
 130 
 131     app_snapshot->migrating = false;
 132 
 133     app_snapshot->finished = false;
 134 }
 135 
 136 /*
 137  * MCA Functions
 138  */
 139 int orte_snapc_full_component_query(mca_base_module_t **module, int *priority)
 140 {
 141     opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
 142                         "snapc:full: component_query()");
 143 
 144     *priority = mca_snapc_full_component.super.priority;
 145     *module = (mca_base_module_t *)&loc_module;
 146 
 147     return ORTE_SUCCESS;
 148 }
 149 
 150 int orte_snapc_full_module_init(bool seed, bool app)
 151 {
 152     int ret, exit_status = ORTE_SUCCESS;
 153 
 154     opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
 155                         "snapc:full: module_init(%d, %d)", seed, app);
 156 
 157     /*
 158      * Global Snapshot Coordinator
 159      */
 160     if(seed) {
 161         opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
 162                             "snapc:full: module_init: Global Snapshot Coordinator");
 163 
 164         orte_snapc_coord_type |= ORTE_SNAPC_GLOBAL_COORD_TYPE;
 165 
 166         if( ORTE_SUCCESS != (ret = global_coord_init()) ) {
 167             exit_status = ret;
 168             goto cleanup;
 169         }
 170     }
 171     /*
 172      * Local Snapshot Coordinator -- orted
 173      */
 174     else if(!seed && !app) {
 175         /*
 176          * JJH Currently we are not guarenteed a bootproxy, and we have no way
 177          * JJH (that I know of) to tell if we were generated from the bootproxy
 178          * JJH or from the HNP inside the application.
 179          * JJH so for this component we assume that there exists a local coordinator
 180          */
 181         opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
 182                             "snapc:full: module_init: Local Snapshot Coordinator");
 183 
 184         orte_snapc_coord_type |= ORTE_SNAPC_LOCAL_COORD_TYPE;
 185 
 186         if( ORTE_SUCCESS != (ret = local_coord_init()) ) {
 187             exit_status = ret;
 188             goto cleanup;
 189         }
 190     }
 191     /*
 192      * Application Snapshot Coordinator
 193      */
 194     else if(app) {
 195         /*
 196          * Start the app coordinator only after ORTE has initialized.
 197          */
 198         if(!orte_initialized) {
 199             goto cleanup;
 200         }
 201 
 202         opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
 203                             "snapc:full: module_init: Application Snapshot Coordinator");
 204 
 205         orte_snapc_coord_type |= ORTE_SNAPC_APP_COORD_TYPE;
 206 
 207         if( ORTE_SUCCESS != (ret = app_coord_init()) ) {
 208             exit_status = ret;
 209             goto cleanup;
 210         }
 211     }
 212     else {
 213         /*
 214          * Logically this should not happen
 215          */
 216         opal_output_verbose(5, mca_snapc_full_component.super.output_handle,
 217                             "snapc:full: module_init: Unknown Snapshot Coordinator");
 218 
 219         orte_snapc_coord_type = ORTE_SNAPC_UNASSIGN_TYPE;
 220 
 221         exit_status = ORTE_ERROR;
 222         goto cleanup;
 223     }
 224 
 225  cleanup:
 226     return exit_status;
 227 }
 228 
 229 int orte_snapc_full_module_finalize(void)
 230 {
 231     opal_output_verbose(10, mca_snapc_full_component.super.output_handle,
 232                         "snapc:full: module_finalize()");
 233 
 234     switch(orte_snapc_coord_type)
 235         {
 236         case ORTE_SNAPC_GLOBAL_COORD_TYPE:
 237             global_coord_finalize();
 238             break;
 239         case ORTE_SNAPC_LOCAL_COORD_TYPE:
 240             local_coord_finalize();
 241             break;
 242         case ORTE_SNAPC_APP_COORD_TYPE:
 243             app_coord_finalize();
 244             break;
 245         default:
 246             break;
 247         }
 248 
 249     orte_snapc_coord_type = ORTE_SNAPC_UNASSIGN_TYPE;
 250 
 251     return ORTE_SUCCESS;
 252 }
 253 
 254 int orte_snapc_full_setup_job(orte_jobid_t jobid) {
 255     int ret, exit_status = ORTE_SUCCESS;
 256 
 257     if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) {
 258         if(ORTE_SUCCESS != (ret = global_coord_setup_job(jobid) ) ) {
 259             exit_status = ret;
 260         }
 261     }
 262     else if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE)) {
 263         if(ORTE_SUCCESS != (ret = local_coord_setup_job(jobid) ) ) {
 264             exit_status = ret;
 265         }
 266     }
 267 
 268     return exit_status;
 269 }
 270 
 271 int orte_snapc_full_release_job(orte_jobid_t jobid) {
 272     int ret, exit_status = ORTE_SUCCESS;
 273 
 274     if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) {
 275         if(ORTE_SUCCESS != (ret = global_coord_release_job(jobid) ) ) {
 276             exit_status = ret;
 277         }
 278     }
 279     else if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE )) {
 280         if(ORTE_SUCCESS != (ret = local_coord_release_job(jobid) ) ) {
 281             exit_status = ret;
 282         }
 283     }
 284 
 285     return exit_status;
 286 }
 287 
 288 int orte_snapc_full_ft_event(int state) {
 289     switch(orte_snapc_coord_type)
 290         {
 291         case ORTE_SNAPC_GLOBAL_COORD_TYPE:
 292         case ORTE_SNAPC_LOCAL_COORD_TYPE:
 293             ; /* Do nothing */
 294             break;
 295         case ORTE_SNAPC_APP_COORD_TYPE:
 296             return app_coord_ft_event(state);
 297             break;
 298         default:
 299             break;
 300         }
 301 
 302     return ORTE_SUCCESS;
 303 }
 304 
 305 int orte_snapc_full_start_ckpt(orte_snapc_base_quiesce_t *datum)
 306 {
 307     switch(orte_snapc_coord_type)
 308         {
 309         case ORTE_SNAPC_GLOBAL_COORD_TYPE:
 310             return global_coord_start_ckpt(datum);
 311             break;
 312         case ORTE_SNAPC_LOCAL_COORD_TYPE:
 313             ; /* Do nothing */
 314             break;
 315         case ORTE_SNAPC_APP_COORD_TYPE:
 316             ; /* Do nothing. Use app_coord_request_op() instead */
 317             break;
 318         default:
 319             break;
 320         }
 321 
 322     return ORTE_SUCCESS;
 323 }
 324 
 325 int orte_snapc_full_end_ckpt(orte_snapc_base_quiesce_t *datum)
 326 {
 327     switch(orte_snapc_coord_type)
 328         {
 329         case ORTE_SNAPC_GLOBAL_COORD_TYPE:
 330             return global_coord_end_ckpt(datum);
 331             break;
 332         case ORTE_SNAPC_LOCAL_COORD_TYPE:
 333             ; /* Do nothing */
 334             break;
 335         case ORTE_SNAPC_APP_COORD_TYPE:
 336             ; /* Do nothing. Use app_coord_request_op() instead */
 337             break;
 338         default:
 339             break;
 340         }
 341 
 342     return ORTE_SUCCESS;
 343 }
 344 
 345 int orte_snapc_full_request_op(orte_snapc_base_request_op_t *datum)
 346 {
 347     switch(orte_snapc_coord_type)
 348         {
 349         case ORTE_SNAPC_GLOBAL_COORD_TYPE:
 350             ; /* Do nothing */
 351             break;
 352         case ORTE_SNAPC_LOCAL_COORD_TYPE:
 353             ; /* Do nothing */
 354             break;
 355         case ORTE_SNAPC_APP_COORD_TYPE:
 356             return app_coord_request_op(datum);
 357             break;
 358         default:
 359             break;
 360         }
 361 
 362     return ORTE_SUCCESS;
 363 }
 364 
 365 /******************
 366  * Local functions
 367  ******************/

/* [<][>][^][v][top][bottom][index][help] */