root/orte/mca/plm/plm_types.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2008 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.  All rights
  13  *                         reserved.
  14  * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
  15  * $COPYRIGHT$
  16  *
  17  * Additional copyrights may follow
  18  *
  19  * $HEADER$
  20  */
  21 
  22 #ifndef ORTE_PLM_TYPES_H
  23 #define ORTE_PLM_TYPES_H
  24 
  25 #include "orte_config.h"
  26 #include "orte/types.h"
  27 
  28 
  29 
  30 BEGIN_C_DECLS
  31 
  32 /*
  33  * Process exit codes
  34  */
  35 
  36 typedef int32_t orte_exit_code_t;
  37 #define ORTE_EXIT_CODE_T OPAL_INT32
  38 
  39 /*
  40  * Process state codes
  41  */
  42 
  43 typedef uint32_t orte_proc_state_t;
  44 #define ORTE_PROC_STATE_T   OPAL_UINT32
  45 #define ORTE_PROC_STATE_ANY 0xffff
  46 
  47 #define ORTE_PROC_STATE_UNDEF                    0  /* undefined process state */
  48 #define ORTE_PROC_STATE_INIT                     1  /* process entry has been created by rmaps */
  49 #define ORTE_PROC_STATE_RESTART                  2  /* the proc is ready for restart */
  50 #define ORTE_PROC_STATE_TERMINATE                3  /* process is marked for termination */
  51 #define ORTE_PROC_STATE_RUNNING                  4  /* daemon has locally fork'd process */
  52 #define ORTE_PROC_STATE_REGISTERED               5  /* proc registered sync */
  53 #define ORTE_PROC_STATE_IOF_COMPLETE             6  /* io forwarding pipes have closed */
  54 #define ORTE_PROC_STATE_WAITPID_FIRED            7  /* waitpid fired on process */
  55 #define ORTE_PROC_STATE_MODEX_READY              8  /* all modex info has been stored */
  56 /*
  57  * Define a "boundary" so we can easily and quickly determine
  58  * if a proc is still running or not - any value less than
  59  * this one means that we are not terminated
  60  */
  61 #define ORTE_PROC_STATE_UNTERMINATED            15
  62 
  63 #define ORTE_PROC_STATE_TERMINATED              20  /* process has terminated and is no longer running */
  64 /* Define a boundary so we can easily and quickly determine
  65  * if a proc abnormally terminated - leave a little room
  66  * for future expansion
  67  */
  68 #define ORTE_PROC_STATE_ERROR                   50
  69 /* Define specific error code values */
  70 #define ORTE_PROC_STATE_KILLED_BY_CMD           (ORTE_PROC_STATE_ERROR +  1)  /* process was killed by ORTE cmd */
  71 #define ORTE_PROC_STATE_ABORTED                 (ORTE_PROC_STATE_ERROR +  2)  /* process aborted */
  72 #define ORTE_PROC_STATE_FAILED_TO_START         (ORTE_PROC_STATE_ERROR +  3)  /* process failed to start */
  73 #define ORTE_PROC_STATE_ABORTED_BY_SIG          (ORTE_PROC_STATE_ERROR +  4)  /* process aborted by signal */
  74 #define ORTE_PROC_STATE_TERM_WO_SYNC            (ORTE_PROC_STATE_ERROR +  5)  /* process exit'd w/o required sync */
  75 #define ORTE_PROC_STATE_COMM_FAILED             (ORTE_PROC_STATE_ERROR +  6)  /* process communication has failed */
  76 #define ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED   (ORTE_PROC_STATE_ERROR +  7)  /* process exceeded a sensor limit */
  77 #define ORTE_PROC_STATE_CALLED_ABORT            (ORTE_PROC_STATE_ERROR +  8)  /* process called "errmgr.abort" */
  78 #define ORTE_PROC_STATE_HEARTBEAT_FAILED        (ORTE_PROC_STATE_ERROR +  9)  /* heartbeat failed to arrive */
  79 #define ORTE_PROC_STATE_MIGRATING               (ORTE_PROC_STATE_ERROR + 10)  /* process failed and is waiting for resources before restarting */
  80 #define ORTE_PROC_STATE_CANNOT_RESTART          (ORTE_PROC_STATE_ERROR + 11)  /* process failed and cannot be restarted */
  81 #define ORTE_PROC_STATE_TERM_NON_ZERO           (ORTE_PROC_STATE_ERROR + 12)  /* process exited with a non-zero status, indicating abnormal */
  82 #define ORTE_PROC_STATE_FAILED_TO_LAUNCH        (ORTE_PROC_STATE_ERROR + 13)  /* unable to launch process */
  83 #define ORTE_PROC_STATE_UNABLE_TO_SEND_MSG      (ORTE_PROC_STATE_ERROR + 14)  /* unable to send a message */
  84 #define ORTE_PROC_STATE_LIFELINE_LOST           (ORTE_PROC_STATE_ERROR + 15)  /* connection to lifeline lost */
  85 #define ORTE_PROC_STATE_NO_PATH_TO_TARGET       (ORTE_PROC_STATE_ERROR + 16)  /* no path for communicating to target peer */
  86 #define ORTE_PROC_STATE_FAILED_TO_CONNECT       (ORTE_PROC_STATE_ERROR + 17)  /* unable to connect to target peer */
  87 #define ORTE_PROC_STATE_PEER_UNKNOWN            (ORTE_PROC_STATE_ERROR + 18)  /* unknown peer */
  88 
  89 /* Define a boundary so that external developers
  90  * have a starting point for defining their own
  91  * proc states
  92  */
  93 #define ORTE_PROC_STATE_DYNAMIC 100
  94 
  95 /*
  96  * App_context state codes
  97  */
  98 typedef int32_t orte_app_state_t;
  99 #define ORTE_APP_STATE_T    OPAL_INT32
 100 
 101 #define ORTE_APP_STATE_UNDEF                0
 102 #define ORTE_APP_STATE_INIT                 1
 103 #define ORTE_APP_STATE_ALL_MAPPED           2
 104 #define ORTE_APP_STATE_RUNNING              3
 105 #define ORTE_APP_STATE_COMPLETED            4
 106 
 107 /*
 108  * Job state codes
 109  */
 110 
 111 typedef int32_t orte_job_state_t;
 112 #define ORTE_JOB_STATE_T    OPAL_INT32
 113 #define ORTE_JOB_STATE_ANY  INT_MAX
 114 
 115 #define ORTE_JOB_STATE_UNDEF                     0
 116 #define ORTE_JOB_STATE_INIT                      1  /* ready to be assigned id */
 117 #define ORTE_JOB_STATE_INIT_COMPLETE             2  /* jobid assigned and setup */
 118 #define ORTE_JOB_STATE_ALLOCATE                  3  /* ready to be allocated */
 119 #define ORTE_JOB_STATE_ALLOCATION_COMPLETE       4  /* allocation completed */
 120 #define ORTE_JOB_STATE_MAP                       5  /* ready to be mapped */
 121 #define ORTE_JOB_STATE_MAP_COMPLETE              6  /* mapping complete */
 122 #define ORTE_JOB_STATE_SYSTEM_PREP               7  /* ready for final sanity check and system values updated */
 123 #define ORTE_JOB_STATE_LAUNCH_DAEMONS            8  /* ready to launch daemons */
 124 #define ORTE_JOB_STATE_DAEMONS_LAUNCHED          9  /* daemons for this job have been launched */
 125 #define ORTE_JOB_STATE_DAEMONS_REPORTED         10  /* all launched daemons have reported */
 126 #define ORTE_JOB_STATE_VM_READY                 11  /* the VM is ready for operation */
 127 #define ORTE_JOB_STATE_LAUNCH_APPS              12  /* ready to launch apps */
 128 #define ORTE_JOB_STATE_SEND_LAUNCH_MSG          13  /* send launch msg to daemons */
 129 #define ORTE_JOB_STATE_RUNNING                  14  /* all procs have been fork'd */
 130 #define ORTE_JOB_STATE_SUSPENDED                15  /* job has been suspended */
 131 #define ORTE_JOB_STATE_REGISTERED               16  /* all procs registered for sync */
 132 #define ORTE_JOB_STATE_READY_FOR_DEBUGGERS      17  /* job ready for debugger init after spawn */
 133 #define ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE    18  /* all local procs have attempted launch */
 134 #define ORTE_JOB_STATE_DEBUGGER_DETACH          19  /* a debugger has detached */
 135 
 136 /*
 137  * Define a "boundary" so we can easily and quickly determine
 138  * if a job is still running or not - any value less than
 139  * this one means that we are not terminated
 140  */
 141 #define ORTE_JOB_STATE_UNTERMINATED             30
 142 
 143 #define ORTE_JOB_STATE_TERMINATED               31  /* all processes have terminated and job is no longer running */
 144 #define ORTE_JOB_STATE_ALL_JOBS_COMPLETE        32
 145 #define ORTE_JOB_STATE_DAEMONS_TERMINATED       33
 146 #define ORTE_JOB_STATE_NOTIFY_COMPLETED         34  /* callback to notify when job completes */
 147 #define ORTE_JOB_STATE_NOTIFIED                 35
 148 
 149 /* Define a boundary so we can easily and quickly determine
 150  * if a job abnormally terminated - leave a little room
 151  * for future expansion
 152  */
 153 #define ORTE_JOB_STATE_ERROR                   50
 154 /* Define specific error code values */
 155 #define ORTE_JOB_STATE_KILLED_BY_CMD           (ORTE_JOB_STATE_ERROR +  1)  /* job was killed by ORTE cmd */
 156 #define ORTE_JOB_STATE_ABORTED                 (ORTE_JOB_STATE_ERROR +  2)  /* at least one process aborted, causing job to abort */
 157 #define ORTE_JOB_STATE_FAILED_TO_START         (ORTE_JOB_STATE_ERROR +  3)  /* at least one process failed to start */
 158 #define ORTE_JOB_STATE_ABORTED_BY_SIG          (ORTE_JOB_STATE_ERROR +  4)  /* job was killed by a signal */
 159 #define ORTE_JOB_STATE_ABORTED_WO_SYNC         (ORTE_JOB_STATE_ERROR +  5)  /* job was aborted because proc exit'd w/o required sync */
 160 #define ORTE_JOB_STATE_COMM_FAILED             (ORTE_JOB_STATE_ERROR +  6)  /* communication has failed */
 161 #define ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED   (ORTE_JOB_STATE_ERROR +  7)  /* job had a process that exceeded a sensor limit */
 162 #define ORTE_JOB_STATE_CALLED_ABORT            (ORTE_JOB_STATE_ERROR +  8)  /* at least one process called "errmgr.abort" */
 163 #define ORTE_JOB_STATE_HEARTBEAT_FAILED        (ORTE_JOB_STATE_ERROR +  9)  /* heartbeat failed to arrive */
 164 #define ORTE_JOB_STATE_NEVER_LAUNCHED          (ORTE_JOB_STATE_ERROR + 10)  /* the job never even attempted to launch due to
 165                                                                              * an error earlier in the
 166                                                                              * launch procedure
 167                                                                              */
 168 #define ORTE_JOB_STATE_ABORT_ORDERED           (ORTE_JOB_STATE_ERROR + 11)  /* the processes in this job have been ordered to "die",
 169                                                                              * but may not have completed it yet. Don't order it again
 170                                                                              */
 171 #define ORTE_JOB_STATE_NON_ZERO_TERM           (ORTE_JOB_STATE_ERROR + 12)  /* at least one process exited with non-zero status */
 172 #define ORTE_JOB_STATE_FAILED_TO_LAUNCH        (ORTE_JOB_STATE_ERROR + 13)
 173 #define ORTE_JOB_STATE_FORCED_EXIT             (ORTE_JOB_STATE_ERROR + 14)
 174 #define ORTE_JOB_STATE_SILENT_ABORT            (ORTE_JOB_STATE_ERROR + 16)  /* an error occurred and was reported elsewhere, so error out quietly */
 175 
 176 #define ORTE_JOB_STATE_REPORT_PROGRESS         (ORTE_JOB_STATE_ERROR + 17)  /* report launch progress - not an error */
 177 #define ORTE_JOB_STATE_ALLOC_FAILED            (ORTE_JOB_STATE_ERROR + 18)  /* job failed to obtain an allocation */
 178 #define ORTE_JOB_STATE_MAP_FAILED              (ORTE_JOB_STATE_ERROR + 19)  /* job failed to map */
 179 #define ORTE_JOB_STATE_CANNOT_LAUNCH           (ORTE_JOB_STATE_ERROR + 20)  /* resources were busy and so the job cannot be launched */
 180 
 181 /* define an FT event */
 182 #define ORTE_JOB_STATE_FT_CHECKPOINT           (ORTE_JOB_STATE_ERROR + 21)
 183 #define ORTE_JOB_STATE_FT_CONTINUE             (ORTE_JOB_STATE_ERROR + 22)
 184 #define ORTE_JOB_STATE_FT_RESTART              (ORTE_JOB_STATE_ERROR + 23)
 185 
 186 
 187 /* Define a boundary so that external developers
 188  * have a starting point for defining their own
 189  * job states
 190  */
 191 #define ORTE_JOB_STATE_DYNAMIC 100
 192 
 193 
 194 /**
 195  * Node State, corresponding to the ORTE_NODE_STATE_* #defines,
 196  * below.  These are #defines instead of an enum because the thought
 197  * is that we may have lots and lots of entries of these in the
 198  * registry and by making this an int8_t, it's only 1 byte, whereas an
 199  * enum defaults to an int (probably 4 bytes).  So it's a bit of a
 200  * space savings.
 201  */
 202 typedef int8_t orte_node_state_t;
 203 #define ORTE_NODE_STATE_T OPAL_INT8
 204 
 205 #define ORTE_NODE_STATE_UNDEF         0  // Node is undefined
 206 #define ORTE_NODE_STATE_UNKNOWN       1  // Node is defined but in an unknown state
 207 #define ORTE_NODE_STATE_DOWN          2  // Node is down
 208 #define ORTE_NODE_STATE_UP            3  // Node is up / available for use
 209 #define ORTE_NODE_STATE_REBOOT        4  // Node is rebooting
 210 #define ORTE_NODE_STATE_DO_NOT_USE    5  // Node is up, but not available for use for the next mapping
 211 #define ORTE_NODE_STATE_NOT_INCLUDED  6  // Node is up, but not part of the node pool for jobs
 212 #define ORTE_NODE_STATE_ADDED         7  // Node was dynamically added to pool
 213 
 214 /* Define a boundary so that external developers
 215  * have a starting point for defining their own
 216  * node states
 217  */
 218 #define ORTE_NODE_STATE_DYNAMIC 100
 219 
 220 /*
 221  * PLM commands
 222  */
 223 typedef uint8_t orte_plm_cmd_flag_t;
 224 #define ORTE_PLM_CMD    OPAL_UINT8
 225 #define ORTE_PLM_LAUNCH_JOB_CMD         1
 226 #define ORTE_PLM_UPDATE_PROC_STATE      2
 227 #define ORTE_PLM_REGISTERED_CMD         3
 228 
 229 END_C_DECLS
 230 
 231 #endif

/* [<][>][^][v][top][bottom][index][help] */